################## Packages needed:
library(tidyverse)
library(estimatr)
library(srvyr)
library(readstata13)
library(haven)
library(readxl)

############################################### Various datasets used ########################################### 

####### ANES time series through 2016
# from: https://electionstudies.org/data-center/anes-time-series-cumulative-data-file/ 
ANES <- read.dta13("anes_timeseries_cdf_stata13.dta")

####### ANES 2020 
# from: https://electionstudies.org/data-center/2020-time-series-study/ 
ANES_20 <- read.dta13("anes_timeseries_2020_stata_20220210.dta")

######## CES (aka CCES) 2006-08 time series
# from: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi%3A10.7910/DVN/II2DB6 
cces <- read_dta(file = "cumulative_2006-2021.dta")
#

########## CES 2010, 2012, 2014, 2016, 2018, 2020 only (b/c individual years have more detailed data than time series file)
# from: https://dataverse.harvard.edu/dataset.xhtml?persistentId=hdl:1902.1/17705 
cces_10 <- read_dta("cces_2010_common_validated.dta")
# from: https://dataverse.harvard.edu/dataset.xhtml?persistentId=hdl:1902.1/21447 
cces_12 <- read_dta(file = "CCES12_Common_VV.dta")
# from: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi%3A10.7910/DVN/XFXJVY 
cces_14 <- read_dta(file = "CCES14_Common_Content_Validated.dta")
# from: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi%3A10.7910/DVN/GDF6Z0 
cces_16 <- read_dta(file = "CCES16_Common_OUTPUT_Feb2018_VV.dta")
# from: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi%3A10.7910/DVN/ZSBZ7K 
cces_18 <- read_dta(file = "cces18_common_vv.dta")
# from: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi%3A10.7910/DVN/E9N6PH  
cces_20 <- read_dta("CES20_Common_OUTPUT_vv.dta")
# 

######### GSS time series
# from: https://gssdataexplorer.norc.org/gss_data 
gss <- read_dta(file = "gss7221_r1a.dta")
#

####### County & zip code population density codes (i.e., rural-urban; from US Economic Research Service)
# from: https://www.ers.usda.gov/data-products/rural-urban-continuum-codes.aspx 
NCHSURCodes2013 <- read_xlsx("NCHSURCodes2013.xlsx")
#

##################################################### Figure 1 #####################################################

################## left - Stonecash replication & extension

ANES_prez <- ANES %>% filter(VCF0705 == "1. Democrat" | VCF0705 == "2. Republican" | 
                               VCF0705 == "3. Other (incl. 3d/minor party candidates and write-ins)") %>%
  mutate(prez_D = ifelse(VCF0705 == '1. Democrat', 1, 0),
         prez_R = ifelse(VCF0705=='2. Republican', 1, 0))

ANES_20_prez <- ANES_20 %>% filter(V202110x > 0) %>%
  mutate(prez_D = ifelse(V202110x == 1, 1, 0),
         prez_R = ifelse(V202110x == 2, 1, 0))

# prez vote by income groups
ANES_prez_incomes <- ANES_prez %>% 
  mutate(bottom_third = ifelse(VCF0114 == "1. 0 to 16 percentile" | VCF0114 == "2. 17 to 33 percentile", 1, 0),
         top_third = ifelse(VCF0114 == "4. 68 to 95 percentile" | VCF0114 == "5. 96 to 100 percentile", 1, 0))
ANES_prez_incomes_bottomthird_Ds <- ANES_prez_incomes %>% filter(bottom_third==1) %>%
  as_survey(weights = c(VCF0009z)) %>% group_by(VCF0004) %>%
  summarise(bottom_third_choosingD_mean = survey_mean(prez_D==1, na.rm = T))
ANES_prez_incomes_topthird_Ds <- ANES_prez_incomes %>% filter(top_third==1) %>%
  as_survey(weights = c(VCF0009z)) %>% group_by(VCF0004) %>%
  summarise(top_third_choosingD_mean = survey_mean(prez_D==1, na.rm = T))
ANES_prez_incomes_topthird_Ds <- ANES_prez_incomes_topthird_Ds %>% 
  select(top_third_choosingD_mean, top_third_choosingD_mean_se)

ANES_prez_bottom_top_combined <- cbind(ANES_prez_incomes_bottomthird_Ds, ANES_prez_incomes_topthird_Ds)
ANES_prez_bottom_top_combined <- ANES_prez_bottom_top_combined %>% filter(bottom_third_choosingD_mean>0)

# add 2020:
ANES_20_prez_incomes <- ANES_20_prez %>%
  mutate(top_third = ifelse(V201617x == 22 | V201617x == 21 | V201617x == 20 | V201617x == 19 | 
                              V201617x == 18 | V201617x == 17, 1, 0),
         bottom_third = ifelse(V201617x == 1 | V201617x == 2 | V201617x == 3 | V201617x == 4 | 
                                 V201617x == 5 | V201617x == 6 | V201617x == 7 | V201617x == 8, 1, 0))
ANES_20_prez_incomes_no0 <- ANES_20_prez_incomes %>% filter(V200010b>0)
ANES_20_prez_incomes_bottomthird_Ds <- ANES_20_prez_incomes_no0 %>% filter(bottom_third==1) %>%
  as_survey(weights = c(V200010b)) %>%
  summarise(bottom_third_choosingD_mean = survey_mean(prez_D==1, na.rm = T))
ANES_20_prez_incomes_topthird_Ds <- ANES_20_prez_incomes_no0 %>% filter(top_third==1) %>%
  as_survey(weights = c(V200010b)) %>%
  summarise(top_third_choosingD_mean = survey_mean(prez_D==1, na.rm = T))

ANES_20_prez_bottom_top_combined <- cbind(ANES_20_prez_incomes_bottomthird_Ds, ANES_20_prez_incomes_topthird_Ds)
ANES_20_prez_bottom_top_combined$VCF0004 <- 2020
ANES_20_prez_bottom_top_combined <- ANES_20_prez_bottom_top_combined %>% 
  select(VCF0004, bottom_third_choosingD_mean, bottom_third_choosingD_mean_se, top_third_choosingD_mean,
         top_third_choosingD_mean_se)

prez_bottomtop_all <- rbind(ANES_prez_bottom_top_combined, ANES_20_prez_bottom_top_combined[1,])

prez_bottomtop_all_diff <- prez_bottomtop_all %>% 
  mutate(diff_bottomtotop = bottom_third_choosingD_mean - top_third_choosingD_mean)


### SAME but just white voters

# prez vote
ANES_prez_white <- ANES_prez %>% filter(VCF0105a == "1. White non-Hispanic (1948-2012)")
ANES_prez_white_incomes <- ANES_prez_white %>% 
  mutate(bottom_third = ifelse(VCF0114 == "1. 0 to 16 percentile" | VCF0114 == "2. 17 to 33 percentile", 1, 0),
         top_third = ifelse(VCF0114 == "4. 68 to 95 percentile" | VCF0114 == "5. 96 to 100 percentile", 1, 0))
ANES_prez_white_incomes_bottomthird_Ds <- ANES_prez_white_incomes %>% filter(bottom_third==1) %>%
  as_survey(weights = c(VCF0009z)) %>% group_by(VCF0004) %>%
  summarise(bottom_third_choosingD_mean = survey_mean(prez_D==1, na.rm = T))
ANES_prez_white_incomes_topthird_Ds <- ANES_prez_white_incomes %>% filter(top_third==1) %>%
  as_survey(weights = c(VCF0009z)) %>% group_by(VCF0004) %>%
  summarise(top_third_choosingD_mean = survey_mean(prez_D==1, na.rm = T))
ANES_prez_white_incomes_topthird_Ds <- ANES_prez_white_incomes_topthird_Ds %>% 
  select(top_third_choosingD_mean, top_third_choosingD_mean_se)

ANES_prez_white_bottom_top_combined <- cbind(ANES_prez_white_incomes_bottomthird_Ds, ANES_prez_white_incomes_topthird_Ds)
ANES_prez_white_bottom_top_combined <- ANES_prez_white_bottom_top_combined %>% filter(bottom_third_choosingD_mean>0)

# add 2020:
ANES_20_prez_white <- ANES_20_prez %>% filter(V201565x == 1)
ANES_20_prez_white_incomes <- ANES_20_prez_white %>%
  mutate(top_third = ifelse(V201617x == 22 | V201617x == 21 | V201617x == 20 | V201617x == 19 | 
                              V201617x == 18 | V201617x == 17, 1, 0),
         bottom_third = ifelse(V201617x == 1 | V201617x == 2 | V201617x == 3 | V201617x == 4 | 
                                 V201617x == 5 | V201617x == 6 | V201617x == 7 | V201617x == 8, 1, 0))
ANES_20_prez_white_incomes_no0 <- ANES_20_prez_white_incomes %>% filter(V200010b>0)
ANES_20_prez_white_incomes_bottomthird_Ds <- ANES_20_prez_white_incomes_no0 %>% filter(bottom_third==1) %>%
  as_survey(weights = c(V200010b)) %>%
  summarise(bottom_third_choosingD_mean = survey_mean(prez_D==1, na.rm = T))
ANES_20_prez_white_incomes_topthird_Ds <- ANES_20_prez_white_incomes_no0 %>% filter(top_third==1) %>%
  as_survey(weights = c(V200010b)) %>%
  summarise(top_third_choosingD_mean = survey_mean(prez_D==1, na.rm = T))

ANES_20_prez_white_bottom_top_combined <- cbind(ANES_20_prez_white_incomes_bottomthird_Ds, ANES_20_prez_white_incomes_topthird_Ds)
ANES_20_prez_white_bottom_top_combined$VCF0004 <- 2020
ANES_20_prez_white_bottom_top_combined <- ANES_20_prez_white_bottom_top_combined %>% 
  select(VCF0004, bottom_third_choosingD_mean, bottom_third_choosingD_mean_se, top_third_choosingD_mean,
         top_third_choosingD_mean_se)

prez_bottomtop_all_white <- rbind(ANES_prez_white_bottom_top_combined, ANES_20_prez_white_bottom_top_combined[1,])

prez_bottomtop_all_diff_white <- prez_bottomtop_all_white %>% 
  mutate(diff_bottomtotop = bottom_third_choosingD_mean - top_third_choosingD_mean)

# plot BOTH whites and all:

prez_bottomtop_all_diff2 <- prez_bottomtop_all_diff %>% mutate(group="All")
prez_bottomtop_all_diff_white2 <- prez_bottomtop_all_diff_white %>% mutate(group="White")
prez_bottomtop_all_diff2_both <- rbind(prez_bottomtop_all_diff2, prez_bottomtop_all_diff_white2)

prez_bottomtop_all_diff_plotBOTH <- 
  ggplot(prez_bottomtop_all_diff2_both, mapping = aes(x = VCF0004, y = diff_bottomtotop, color = group)) + 
  geom_point(size = 1.0) +
  geom_line(prez_bottomtop_all_diff2_both, mapping = aes(x = VCF0004, y = diff_bottomtotop, color = group), alpha=0.5) +
  geom_smooth(method = "loess", se = TRUE) +
  ylab("Difference in % Pts (Bottom 1/3 - Top 1/3)") + xlab("Year") + 
  labs(title = "Democratic Presidential Voting: Low- v. High-Income",
       subtitle = "Dotted line: End of Stonecash analysis (ANES)") +
  theme(legend.position = "bottom") +
  scale_colour_manual(values = c("navy", "cornflowerblue")) +
  guides(color=guide_legend(title="Race/\nethnicity\ngroup")) +
  theme_bw() +
  geom_hline(yintercept = 0, color = "purple") +
  geom_vline(xintercept = 1996, color = "black", linetype = "dashed") +
  scale_y_continuous(breaks = seq(-0.1, 0.3, by = 0.05), labels = scales::percent_format(accuracy = 1), 
                     limits = c(-0.1, 0.3)) +
  scale_x_continuous(breaks = seq(1956, 2020, by = 4), limits = c(1952, 2020)) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

ggsave(filename = "Stonecash_prez_bottomtop_all_diff_plotBOTH.jpg", plot = prez_bottomtop_all_diff_plotBOTH)
#

############################# right - Bartels replication & extension

ANES_prez_incomes <- ANES_prez %>% 
  mutate(bottom_third = ifelse(VCF0114 == "1. 0 to 16 percentile" | VCF0114 == "2. 17 to 33 percentile", 1, 0),
         top_third = ifelse(VCF0114 == "4. 68 to 95 percentile" | VCF0114 == "5. 96 to 100 percentile", 1, 0))
ANES_prez_incomes_bottomthird <- ANES_prez_incomes %>% filter(bottom_third==1) %>%
  as_survey(weights = c(VCF0009z)) %>% group_by(VCF0004) %>%
  summarise(bottom_third_choosingD_mean = survey_mean(prez_D==1, na.rm = T),
            bottom_third_choosingR_mean = survey_mean(prez_R==1, na.rm = T))
ANES_prez_incomes_topthird <- ANES_prez_incomes %>% filter(top_third==1) %>%
  as_survey(weights = c(VCF0009z)) %>% group_by(VCF0004) %>%
  summarise(top_third_choosingD_mean = survey_mean(prez_D==1, na.rm = T),
            top_third_choosingR_mean = survey_mean(prez_R==1, na.rm = T))
ANES_prez_incomes_topthird <- ANES_prez_incomes_topthird %>% select(top_third_choosingD_mean, top_third_choosingD_mean_se, top_third_choosingR_mean, top_third_choosingR_mean_se)

ANES_prez_incomes_combined <- cbind(ANES_prez_incomes_bottomthird, ANES_prez_incomes_topthird)

# add 2020:
ANES_20_prez_incomes <- ANES_20_prez %>%
  mutate(top_third = ifelse(V201617x == 22 | V201617x == 21 | V201617x == 20 | V201617x == 19 | 
                              V201617x == 18 | V201617x == 17, 1, 0),
         bottom_third = ifelse(V201617x == 1 | V201617x == 2 | V201617x == 3 | V201617x == 4 | 
                                 V201617x == 5 | V201617x == 6 | V201617x == 7 | V201617x == 8, 1, 0))
ANES_20_prez_incomes_no0 <- ANES_20_prez_incomes %>% filter(V200010b>0)
ANES_20_prez_incomes_bottomthird <- ANES_20_prez_incomes_no0 %>% filter(bottom_third==1) %>%
  as_survey(weights = c(V200010b)) %>%
  summarise(bottom_third_choosingD_mean = survey_mean(prez_D==1, na.rm = T),
            bottom_third_choosingR_mean = survey_mean(prez_R==1, na.rm = T))
ANES_20_prez_incomes_topthird <- ANES_20_prez_incomes_no0 %>% filter(top_third==1) %>%
  as_survey(weights = c(V200010b)) %>%
  summarise(top_third_choosingD_mean = survey_mean(prez_D==1, na.rm = T),
            top_third_choosingR_mean = survey_mean(prez_R==1, na.rm = T))

ANES_20_prez_bottom_top_combined <- cbind(ANES_20_prez_incomes_bottomthird, ANES_20_prez_incomes_topthird)
ANES_20_prez_bottom_top_combined$VCF0004 <- 2020
ANES_20_prez_bottom_top_combined <- ANES_20_prez_bottom_top_combined %>% 
  select(VCF0004, bottom_third_choosingD_mean, bottom_third_choosingD_mean_se, top_third_choosingD_mean,
         top_third_choosingD_mean_se, bottom_third_choosingR_mean, bottom_third_choosingR_mean_se, 
         top_third_choosingR_mean, top_third_choosingR_mean_se)

prez_bottomtop_all <- rbind(ANES_prez_incomes_combined, ANES_20_prez_bottom_top_combined)

prez_bottomtop_all_diffshare <- prez_bottomtop_all %>% 
  mutate(top_diff_DminusR = top_third_choosingD_mean - top_third_choosingR_mean,
         bottom_diff_DminusR = bottom_third_choosingD_mean - bottom_third_choosingR_mean)

prez_bottomtop_all_diffshare2 <- prez_bottomtop_all_diffshare %>% mutate(group="All")
prez_bottomtop_all_diff_white2 <- prez_bottomtop_all_diff_white %>% mutate(group="White")
prez_bottomtop_all_diff2_both <- rbind(prez_bottomtop_all_diff2, prez_bottomtop_all_diff_white2)


### same but just white voters
ANES_prez_white <- ANES_prez %>% filter(VCF0105a == "1. White non-Hispanic (1948-2012)")
ANES_20_prez_white <- ANES_20_prez %>% filter(V201565x == 1)

ANES_prez_white_incomes <- ANES_prez_white %>% 
  mutate(bottom_third = ifelse(VCF0114 == "1. 0 to 16 percentile" | VCF0114 == "2. 17 to 33 percentile", 1, 0),
         top_third = ifelse(VCF0114 == "4. 68 to 95 percentile" | VCF0114 == "5. 96 to 100 percentile", 1, 0))
ANES_prez_white_incomes_bottomthird <- ANES_prez_white_incomes %>% filter(bottom_third==1) %>%
  as_survey(weights = c(VCF0009z)) %>% group_by(VCF0004) %>%
  summarise(bottom_third_choosingD_mean = survey_mean(prez_D==1, na.rm = T),
            bottom_third_choosingR_mean = survey_mean(prez_R==1, na.rm = T))
ANES_prez_white_incomes_topthird <- ANES_prez_white_incomes %>% filter(top_third==1) %>%
  as_survey(weights = c(VCF0009z)) %>% group_by(VCF0004) %>%
  summarise(top_third_choosingD_mean = survey_mean(prez_D==1, na.rm = T),
            top_third_choosingR_mean = survey_mean(prez_R==1, na.rm = T))
ANES_prez_white_incomes_topthird <- ANES_prez_white_incomes_topthird %>% select(top_third_choosingD_mean, top_third_choosingD_mean_se, top_third_choosingR_mean, top_third_choosingR_mean_se)

ANES_prez_white_incomes_combined <- cbind(ANES_prez_white_incomes_bottomthird, ANES_prez_white_incomes_topthird)

# add 2020:
ANES_20_prez_white_incomes <- ANES_20_prez_white %>%
  mutate(top_third = ifelse(V201617x == 22 | V201617x == 21 | V201617x == 20 | V201617x == 19 | 
                              V201617x == 18 | V201617x == 17, 1, 0),
         bottom_third = ifelse(V201617x == 1 | V201617x == 2 | V201617x == 3 | V201617x == 4 | 
                                 V201617x == 5 | V201617x == 6 | V201617x == 7 | V201617x == 8, 1, 0))
ANES_20_prez_white_incomes_no0 <- ANES_20_prez_white_incomes %>% filter(V200010b>0)
ANES_20_prez_white_incomes_bottomthird <- ANES_20_prez_white_incomes_no0 %>% filter(bottom_third==1) %>%
  as_survey(weights = c(V200010b)) %>%
  summarise(bottom_third_choosingD_mean = survey_mean(prez_D==1, na.rm = T),
            bottom_third_choosingR_mean = survey_mean(prez_R==1, na.rm = T))
ANES_20_prez_white_incomes_topthird <- ANES_20_prez_white_incomes_no0 %>% filter(top_third==1) %>%
  as_survey(weights = c(V200010b)) %>%
  summarise(top_third_choosingD_mean = survey_mean(prez_D==1, na.rm = T),
            top_third_choosingR_mean = survey_mean(prez_R==1, na.rm = T))

ANES_20_prez_white_bottom_top_combined <- cbind(ANES_20_prez_white_incomes_bottomthird, ANES_20_prez_white_incomes_topthird)
ANES_20_prez_white_bottom_top_combined$VCF0004 <- 2020
ANES_20_prez_white_bottom_top_combined <- ANES_20_prez_white_bottom_top_combined %>% 
  select(VCF0004, bottom_third_choosingD_mean, bottom_third_choosingD_mean_se, top_third_choosingD_mean,
         top_third_choosingD_mean_se, bottom_third_choosingR_mean, bottom_third_choosingR_mean_se, 
         top_third_choosingR_mean, top_third_choosingR_mean_se)

prez_white_bottomtop_all <- rbind(ANES_prez_white_incomes_combined, ANES_20_prez_white_bottom_top_combined)

prez_white_bottomtop_all_diffshare <- prez_white_bottomtop_all %>% 
  mutate(top_diff_DminusR = top_third_choosingD_mean - top_third_choosingR_mean,
         bottom_diff_DminusR = bottom_third_choosingD_mean - bottom_third_choosingR_mean)

# PLOT BOTH - whites and all races:

prez_bottomtop_all_diffshare2 <- prez_bottomtop_all_diffshare %>% mutate(group="All")
prez_white_bottomtop_all_diffshare2 <- prez_white_bottomtop_all_diffshare %>% mutate(group="White")
prez_bottomtop_all_diffshare2_both <- rbind(prez_bottomtop_all_diffshare2, prez_white_bottomtop_all_diffshare2)

prez_bottomtop_all_diffshare_plotBOTH <- 
  ggplot(prez_bottomtop_all_diffshare2_both, mapping = aes(x = VCF0004, y = top_third_choosingD_mean, color = group)) + 
  geom_point(size = 1.0) +
  geom_smooth(method = "loess", se = TRUE) +
  geom_line(prez_bottomtop_all_diffshare2_both, mapping = aes(x = VCF0004, y = top_third_choosingD_mean, color = group), 
            alpha = 0.5) +
  ylab("Democratic Share of Two-Party Vote") + xlab("Year") + 
  labs(title = "Democratic Presidential Vote, Top Third by Income",
       subtitle = "Dotted line: End of Bartels analysis (ANES)") +
  theme(legend.position = "bottom") +
  scale_color_manual(values = c("navy", "cornflowerblue")) +
  guides(color=guide_legend(title="Race/\nethnicity\ngroup")) +
  theme_bw() +
  geom_vline(xintercept = 2004, color = "black", linetype = "dashed") +
  geom_hline(yintercept = .5, color = "purple") +
  scale_y_continuous(breaks = seq(0, 1.0, by = 0.05), labels = scales::percent_format(accuracy = 1), 
                     limits = c(0.25, 0.65)) +
  scale_x_continuous(breaks = seq(1956, 2020, by = 4), limits = c(1952, 2020)) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

ggsave(filename = "Bartels_prez_bottomtop_all_diffshare_plotBOTH.jpg", plot = prez_bottomtop_all_diffshare_plotBOTH)

#

#################################################### Figure 2 ####################################################

############################## left - ANES U-shape

ANES_prez_incomesALL <- ANES_prez %>% 
  mutate(income_cat = ifelse(VCF0114 == "1. 0 to 16 percentile" | VCF0114 == "2. 17 to 33 percentile", "0-33%",
                             ifelse(VCF0114 == "3. 34 to 67 percentile", "34-66%",
                                    ifelse(VCF0114 == "4. 68 to 95 percentile", "67-95%",
                                           ifelse(VCF0114 == "5. 96 to 100 percentile", "96-100%", NA)))))
ANES_prez_incomes_weightedprez <- ANES_prez_incomesALL %>%
  as_survey(weights = c(VCF0009z)) %>% group_by(income_cat, VCF0004) %>%
  summarise(choosingD_mean = survey_mean(prez_D==1, na.rm = T),
            choosingR_mean = survey_mean(prez_R==1, na.rm = T))

# add 2020:
ANES_20_prez_incomesALL <- ANES_20_prez %>%
  mutate(income_cat = ifelse(V201617x < 9, "0-33%",
                             ifelse(V201617x > 8 & V201617x < 17, "34-66%",
                                    ifelse(V201617x > 16 & V201617x < 22, "67-95%",
                                           ifelse(V201617x==22, "96-100%", NA)))))

ANES_20_prez_incomesALL_no0 <- ANES_20_prez_incomesALL %>% filter(V200010b>0)
ANES_20_prez_incomes_weightedprez <- ANES_20_prez_incomesALL_no0 %>%
  as_survey(weights = c(V200010b)) %>% group_by(income_cat) %>%
  summarise(choosingD_mean = survey_mean(prez_D==1, na.rm = T),
            choosingR_mean = survey_mean(prez_R==1, na.rm = T))

ANES_20_prez_incomes_weightedprez$VCF0004 <- 2020
ANES_20_prez_incomes_weightedprez_select <- ANES_20_prez_incomes_weightedprez %>% 
  select(VCF0004, income_cat, choosingD_mean, choosingD_mean_se, choosingR_mean, choosingR_mean_se)

ANES_prez_incomes_weightedprez_w20 <- rbind(ANES_prez_incomes_weightedprez, ANES_20_prez_incomes_weightedprez_select) %>%
  filter(income_cat=="0-33%"|income_cat=="34-66%"|income_cat=="67-95%"|income_cat=="96-100%")

ANES_prez_incomes_weightedprez_w20_CALC <- ANES_prez_incomes_weightedprez_w20 %>%
  mutate(DoverR = choosingD_mean - choosingR_mean)

ANES_prez_incomes_weightedprez_w20_CALC_2020 <- ANES_prez_incomes_weightedprez_w20_CALC %>% filter(VCF0004==2020)
ANES_prez_incomes_weightedprez_w20_CALC_2012 <- ANES_prez_incomes_weightedprez_w20_CALC %>% filter(VCF0004==2012)
ANES_prez_incomes_weightedprez_w20_CALC_2000 <- ANES_prez_incomes_weightedprez_w20_CALC %>% filter(VCF0004==2000)
ANES_prez_incomes_weightedprez_w20_CALC_1992 <- ANES_prez_incomes_weightedprez_w20_CALC %>% filter(VCF0004==1992)
ANES_prez_incomes_weightedprez_w20_CALC_1980 <- ANES_prez_incomes_weightedprez_w20_CALC %>% filter(VCF0004==1980)

ANES_prez_incomes_weightedprez_w20_CALC_post1980 <- ANES_prez_incomes_weightedprez_w20_CALC %>% filter(VCF0004>=1980)

# plot

prez_Ushape_byyear <- 
  ggplot() + 
  geom_point(ANES_prez_incomes_weightedprez_w20_CALC_2020, mapping = aes(x = income_cat, y = DoverR), color = "black") +
  geom_line(ANES_prez_incomes_weightedprez_w20_CALC_2020, mapping = aes(x = income_cat, y = DoverR, group = VCF0004), 
            color = "black") +
  geom_point(ANES_prez_incomes_weightedprez_w20_CALC_2012, mapping = aes(x = income_cat, y = DoverR), color = "mediumblue") +
  geom_line(ANES_prez_incomes_weightedprez_w20_CALC_2012, mapping = aes(x = income_cat, y = DoverR, group = VCF0004), 
            color = "mediumblue") +
  geom_point(ANES_prez_incomes_weightedprez_w20_CALC_2000, mapping = aes(x = income_cat, y = DoverR), color = "dodgerblue") +
  geom_line(ANES_prez_incomes_weightedprez_w20_CALC_2000, mapping = aes(x = income_cat, y = DoverR, group = VCF0004), 
            color = "dodgerblue") +
  geom_point(ANES_prez_incomes_weightedprez_w20_CALC_1992, mapping = aes(x = income_cat, y = DoverR), 
             color = "steelblue") +
  geom_line(ANES_prez_incomes_weightedprez_w20_CALC_1992, mapping = aes(x = income_cat, y = DoverR, group = VCF0004), 
            color = "steelblue") +
  geom_point(ANES_prez_incomes_weightedprez_w20_CALC_1980, mapping = aes(x = income_cat, y = DoverR), 
             color = "azure4") +
  geom_line(ANES_prez_incomes_weightedprez_w20_CALC_1980, mapping = aes(x = income_cat, y = DoverR, group = VCF0004), 
            color = "azure4") +
  annotate("text", x = "96-100%", y = .22, label = "2020", size = 5.0, color = "black", fontface = "bold") +
  annotate("text", x = "96-100%", y = -.12, label = "2012", size = 5.0, color = "mediumblue", fontface = "bold") +
  annotate("text", x = "96-100%", y = -.4, label = "2000", size = 5.0, color = "dodgerblue", fontface = "bold") +
  annotate("text", x = "96-100%", y = -.02, label = "1992", size = 5.0, color = "steelblue", fontface = "bold") +
  annotate("text", x = "96-100%", y = -.57, label = "1980", size = 5.0, color = "azure4", fontface = "bold") +
  ylab("Dem. prez vote % - Repub. prez vote %") + xlab("Income group") + 
  labs(title = "Democratic Presidential Vote Margin by Income Groups",
       subtitle = "ANES (1980-2020); incl. 3rd party candidates") +
  theme(legend.position = "bottom") +
  theme_bw() +
  geom_hline(yintercept = 0, color = "purple") +
  scale_y_continuous(breaks = seq(-1.0, 1.0, by = 0.1), labels = scales::percent_format(accuracy = 1), 
                     limits = c(-0.65, 0.4)) 

ggsave(filename = "ANES_prez_Ushape_byyear.jpg", plot = prez_Ushape_byyear)

########################### right - CES U-shape

cces_new <- cces %>% mutate(Dprez = ifelse(voted_pres_party==1, 1, 0),
                            Rprez = ifelse(voted_pres_party==2, 1, 0),
                            gov_D = ifelse(voted_gov_party == 1, 1, 0),
                            gov_R = ifelse(voted_gov_party == 2, 1, 0),
                            rep_D = ifelse(voted_rep_party == 1, 1, 0),
                            rep_R = ifelse(voted_rep_party == 2, 1, 0))

cces_08ts <- cces_new %>% filter(year == 2008)
cces_12ts <- cces_new %>% filter(year == 2012)
cces_16ts <- cces_new %>% filter(year == 2016)
cces_20ts <- cces_new %>% filter(year == 2020)

### NEW INCOME CATEGORIES: https://dqydj.com/household-income-by-year/ 
# 2008: 0-25/30, -45/50, 75/80, 125
cces_08_prez_incomes <- cces_08ts %>% 
  mutate(income_cat = ifelse(faminc < 4, "0-20%",
                             ifelse(faminc ==4 | faminc == 5, "20-40%",
                                    ifelse(faminc > 5 & faminc < 9, "40-60%",
                                           ifelse(faminc==9 | faminc==10, "60-80%",
                                                  ifelse(faminc==11 | faminc==12, "80-100%", NA))))))

cces_08_prez_incomes_weight <- cces_08_prez_incomes %>%
  as_survey(weights = c(weight)) %>% group_by(income_cat) %>%
  summarise(choosingD_mean = survey_mean(Dprez==1, na.rm = T),
            choosingR_mean = survey_mean(Rprez==1, na.rm = T),
            Dgov_mean = survey_mean(gov_D==1, na.rm = T),
            Rgov_mean = survey_mean(gov_R==1, na.rm = T),
            Dhouse_mean = survey_mean(rep_D==1, na.rm = T),
            Rhouse_mean = survey_mean(rep_R==1, na.rm = T)) %>% mutate(year=2008)
# 2012: 0-20/25, -40/45, 70/75, 117
cces_12_prez_incomes <- cces_12ts %>% 
  mutate(income_cat = ifelse(faminc < 3, "0-20%",
                             ifelse(faminc==3 | faminc ==4, "20-40%",
                                    ifelse(faminc > 4 & faminc < 8, "40-60%",
                                           ifelse(faminc==8 |faminc==9 | faminc==10, "60-80%",
                                                  ifelse(faminc==11 | faminc==12, "80-100%", NA))))))

cces_12_prez_incomes_weight <- cces_12_prez_incomes %>%
  as_survey(weights = c(weight)) %>% group_by(income_cat) %>%
  summarise(choosingD_mean = survey_mean(Dprez==1, na.rm = T),
            choosingR_mean = survey_mean(Rprez==1, na.rm = T),
            Dgov_mean = survey_mean(gov_D==1, na.rm = T),
            Rgov_mean = survey_mean(gov_R==1, na.rm = T),
            Dhouse_mean = survey_mean(rep_D==1, na.rm = T),
            Rhouse_mean = survey_mean(rep_R==1, na.rm = T)) %>% mutate(year=2012)
# 2016: 0-25/30, -45/50, 75-80, 127
cces_16_prez_incomes <- cces_16ts %>% 
  mutate(income_cat = ifelse(faminc < 4, "0-20%",
                             ifelse(faminc ==4 | faminc == 5, "20-40%",
                                    ifelse(faminc > 5 & faminc < 9, "40-60%",
                                           ifelse(faminc==9 | faminc==10, "60-80%",
                                                  ifelse(faminc==11 | faminc==12, "80-100%", NA))))))

cces_16_prez_incomes_weight <- cces_16_prez_incomes %>%
  as_survey(weights = c(weight)) %>% group_by(income_cat) %>%
  summarise(choosingD_mean = survey_mean(Dprez==1, na.rm = T),
            choosingR_mean = survey_mean(Rprez==1, na.rm = T),
            Dgov_mean = survey_mean(gov_D==1, na.rm = T),
            Rgov_mean = survey_mean(gov_R==1, na.rm = T),
            Dhouse_mean = survey_mean(rep_D==1, na.rm = T),
            Rhouse_mean = survey_mean(rep_R==1, na.rm = T)) %>% mutate(year=2016)
# 2020: 0-30, -50/55, 85-90, 140-145
cces_20_prez_incomes <- cces_20ts %>% 
  mutate(income_cat = ifelse(faminc < 4, "0-20%",
                             ifelse(faminc ==4 | faminc == 5, "20-40%",
                                    ifelse(faminc > 5 & faminc < 9, "40-60%",
                                           ifelse(faminc==9 | faminc==10, "60-80%",
                                                  ifelse(faminc==11 | faminc==12, "80-100%", NA))))))

cces_20_prez_incomes_weight <- cces_20_prez_incomes %>%
  as_survey(weights = c(weight)) %>% group_by(income_cat) %>%
  summarise(choosingD_mean = survey_mean(Dprez==1, na.rm = T),
            choosingR_mean = survey_mean(Rprez==1, na.rm = T),
            Dgov_mean = survey_mean(gov_D==1, na.rm = T),
            Rgov_mean = survey_mean(gov_R==1, na.rm = T),
            Dhouse_mean = survey_mean(rep_D==1, na.rm = T),
            Rhouse_mean = survey_mean(rep_R==1, na.rm = T)) %>% mutate(year=2020)

# BIND!
CCES_80121620_weightprez <- rbind(cces_08_prez_incomes_weight, cces_12_prez_incomes_weight, 
                                  cces_16_prez_incomes_weight, cces_20_prez_incomes_weight) %>%
  filter(income_cat=="0-20%"|income_cat=="20-40%"|income_cat=="40-60%"|income_cat=="60-80%"|income_cat=="80-100%")

CCES_80121620_weightprez_CALC <- CCES_80121620_weightprez %>%
  mutate(DoverR = choosingD_mean - choosingR_mean,
         DoverRguv = Dgov_mean - Rgov_mean,
         DoverRhouse = Dhouse_mean - Rhouse_mean)

CCES_80121620_weightprez_CALC_08 <- CCES_80121620_weightprez_CALC %>% filter(year==2008)
CCES_80121620_weightprez_CALC_12 <- CCES_80121620_weightprez_CALC %>% filter(year==2012)
CCES_80121620_weightprez_CALC_16 <- CCES_80121620_weightprez_CALC %>% filter(year==2016)
CCES_80121620_weightprez_CALC_20 <- CCES_80121620_weightprez_CALC %>% filter(year==2020)

# PLOT
prez_Ushape_byyearCCES <- 
  ggplot() + 
  geom_point(CCES_80121620_weightprez_CALC_20, mapping = aes(x = income_cat, y = DoverR), color = "black") +
  geom_line(CCES_80121620_weightprez_CALC_20, mapping = aes(x = income_cat, y = DoverR, group = year), 
            color = "black") +
  geom_point(CCES_80121620_weightprez_CALC_16, mapping = aes(x = income_cat, y = DoverR), color = "purple3") +
  geom_line(CCES_80121620_weightprez_CALC_16, mapping = aes(x = income_cat, y = DoverR, group = year), 
            color = "purple3") +
  geom_point(CCES_80121620_weightprez_CALC_12, mapping = aes(x = income_cat, y = DoverR), color = "hotpink4") +
  geom_line(CCES_80121620_weightprez_CALC_12, mapping = aes(x = income_cat, y = DoverR, group = year), 
            color = "hotpink4") +
  geom_point(CCES_80121620_weightprez_CALC_08, mapping = aes(x = income_cat, y = DoverR), 
             color = "magenta3") +
  geom_line(CCES_80121620_weightprez_CALC_08, mapping = aes(x = income_cat, y = DoverR, group = year), 
            color = "magenta3") +
  annotate("text", x = "80-100%", y = .15, label = "2020", size = 4.5, color = "black", fontface = "bold") +
  annotate("text", x = "80-100%", y = .115, label = "2016", size = 4.5, color = "purple3", fontface = "bold") +
  annotate("text", x = "80-100%", y = .065, label = "2012", size = 4.5, color = "hotpink4", fontface = "bold") +
  annotate("text", x = "80-100%", y = .01, label = "2008", size = 4.5, color = "magenta3", fontface = "bold") +
  ylab("Dem. prez vote % - Repub. prez vote %") + xlab("Income group") + 
  labs(title = "Democratic Presidential Vote Margin by Income Groups",
       subtitle = "CES (2008-2020)") +
  theme(legend.position = "bottom") +
  theme_bw() +
  geom_hline(yintercept = 0, color = "purple") +
  scale_y_continuous(breaks = seq(-1.0, 1.0, by = 0.05), labels = scales::percent_format(accuracy = 1), 
                     limits = c(-0.05, 0.3)) 

ggsave(filename = "prez_Ushape_byyearCCES.jpg", plot = prez_Ushape_byyearCCES)

# 

#################################################### Figure 3 ####################################################

############################ left - Top 5%, 1%, stock owners

### 2012
cces_12_selected <- cces_12 %>% select(V101, V103, faminc, educ, inputzip, inputstate, pid7, CC410a, countyfips)
colnames(cces_12_selected) <- c("case_id", "weight", "fam_income", "educ", "zip", "state", "pid", "prez_vote", 
                                "county_fips")
cces_12_selected2 <- cces_12_selected %>% mutate(prez_vote2 = ifelse(prez_vote==1, 2, 
                                                                     ifelse(prez_vote==2, 1, 
                                                                            ifelse(prez_vote==3, 3, NA))),
                                                 year = 2012)
cces_12_selected2_select <- cces_12_selected2 %>% select(weight, fam_income, pid, prez_vote2, year)
cces_12_selected2_select <- as.matrix(cces_12_selected2_select)

### 2016
cces_16new <- cces_16 %>% mutate(vote_2016 = ifelse(CC16_410a==1, 2, 
                                                    ifelse(CC16_410a==2, 1, 
                                                           ifelse(CC16_410a==3, 3, NA))))
cces_16_selected <- cces_16new %>% select(V101, commonweight, faminc, educ, lookupzip, inputstate, pid7, 
                                          vote_2016, countyfips)
colnames(cces_16_selected) <- c("case_id", "weight", "fam_income", "educ", "zip", "state", "pid", "prez_vote", "county_fips")
cces_16_selected2 <- cces_16_selected %>% mutate(prez_vote2 = ifelse(prez_vote==1, 2, # reverse again! see above.
                                                                     ifelse(prez_vote==2, 1, 
                                                                            ifelse(prez_vote==3, 3, NA))),
                                                 year = 2016)
cces_16_selected2_select <- cces_16_selected2 %>% select(weight, fam_income, pid, prez_vote2, year)
cces_16_selected2_select <- as.matrix(cces_16_selected2_select)

# 20
cces_20new <- cces_20 %>% select(commonweight, faminc_new, pid7, CC20_410) # prez 1 = Biden, 2 = Trump
cces_20new2 <- cces_20new %>% mutate(prez_vote2 = ifelse(CC20_410==1, 2, 
                                                         ifelse(CC20_410==2, 1, 
                                                                ifelse(CC20_410==3, 3, NA))),
                                     year = 2020)
cces_20new2 <- cces_20new2 %>% select(commonweight, faminc_new, pid7, prez_vote2, year)
colnames(cces_20new2) <- c("weight", "fam_income", "pid", "prez_vote", "year") # now prez 1 = R, 2 = D
cces_20new2 <- as.matrix(cces_20new2)

# BIND! 
cces_121620 <- rbind(cces_12_selected2_select, cces_16_selected2_select, cces_20new2) # prez 1 = R, 2 = D for all
cces_121620 <- as.data.frame(cces_121620)

cces_121620_prez <- cces_121620 %>% mutate(
  prez_D = ifelse(prez_vote2 == 2, 1, 0),
  prez_R = ifelse(prez_vote2 == 1, 1, 0))

# 500k+
cces_121620_500kplus_prez <- cces_121620_prez %>% filter(fam_income == 16) %>% 
  as_survey(weights = c(weight)) %>% group_by(year) %>% 
  summarize(mean_prez_D = survey_mean(prez_D, na.rm = T), mean_prez_R = survey_mean(prez_R, na.rm = T)) %>%
  mutate(prezDshare = mean_prez_D / (mean_prez_D + mean_prez_R))

cces_121620_prez %>% filter(fam_income == 16) %>% nrow() # 590

# 200k+
cces_121620_200kplus_prez <- cces_121620_prez %>% filter(fam_income>12 & fam_income<17) %>% 
  as_survey(weights = c(weight)) %>% group_by(year) %>% 
  summarize(mean_prez_D = survey_mean(prez_D, na.rm = T), mean_prez_R = survey_mean(prez_R, na.rm = T)) %>%
  mutate(prezDshare = mean_prez_D / (mean_prez_D + mean_prez_R))

cces_121620_prez %>% filter(fam_income>12 & fam_income<17) %>% nrow() # 4,997

# stock owners -- DIFF DATA FRAME: cces_1020_stock_prez & cces_1020_stock_prez150k

# 2010
cces_10_stock <- cces_10 %>% select(V101, V266, V212d, CC317, V246)
cces_10_stock <- cces_10_stock %>% mutate(prez_vote = ifelse(CC317==2, 1,
                                                             ifelse(CC317==1, 2, 
                                                                    ifelse(CC317==3, 3, NA))),
                                          year = 2010)
cces_10_stock <- cces_10_stock %>% select(V101, V266, V212d, V246, prez_vote, year)
colnames(cces_10_stock) <- c("weight", "stock", "pid", "income", "prez_vote", "year")
cces_10_stock <- as.data.frame(cces_10_stock)

# 2012
cces_12_stock <- cces_12 %>% select(V103, investor, pid7, faminc, CC410a)
cces_12_stock <- cces_12_stock %>% mutate(prez_vote = ifelse(CC410a==2, 1,
                                                             ifelse(CC410a==1, 2, 
                                                                    ifelse(CC410a==3, 3, NA))),
                                          year = 2012)
cces_12_stock <- cces_12_stock %>% select(V103, investor, pid7, faminc, prez_vote, year)
colnames(cces_12_stock) <- c("weight", "stock", "pid", "income","prez_vote", "year")
cces_12_stock <- as.data.frame(cces_12_stock)

# 2014
cces_14_stock <- cces_14 %>% select(weight, investor, pid7, faminc, CC14_317)
cces_14_stock <- cces_14_stock %>% mutate(prez_vote = ifelse(CC14_317==2, 1,
                                                             ifelse(CC14_317==1, 2, 
                                                                    ifelse(CC14_317==3, 3, NA))),
                                          year = 2014)
cces_14_stock <- cces_14_stock %>% select(weight, investor, pid7, faminc, prez_vote, year)
colnames(cces_14_stock) <- c("weight", "stock", "pid", "income","prez_vote", "year")
cces_14_stock <- as.data.frame(cces_14_stock)

# 2016
cces_16_stock <- cces_16 %>% select(commonweight, investor, pid7, faminc, CC16_410a) %>% mutate(year = 2016)
colnames(cces_16_stock) <- c("weight", "stock", "pid","income", "prez_vote", "year")
cces_16_stock <- as.data.frame(cces_16_stock)

# 2018
cces_18_stock <- cces_18 %>% select(commonweight, investor, pid7, faminc_new, CC18_317) %>% mutate(year = 2018)
colnames(cces_18_stock) <- c("weight", "stock", "pid", "income", "prez_vote", "year")
cces_18_stock <- as.data.frame(cces_18_stock)

# 2020:
cces_20_stock <- cces_20 %>% select(commonweight, investor, pid7, faminc_new, CC20_410)
cces_20_stock <- cces_20_stock %>% mutate(prez_vote = ifelse(CC20_410==2, 1,
                                                             ifelse(CC20_410==1, 2, NA)),
                                          year = 2020)
cces_20_stock <- cces_20_stock %>% select(commonweight, investor, pid7, faminc_new, prez_vote, year)
colnames(cces_20_stock) <- c("weight", "stock", "pid", "income", "prez_vote", "year")

# STACK!
cces_1020_stock <- bind_rows(cces_10_stock, cces_12_stock, cces_14_stock, 
                             cces_16_stock, cces_18_stock, cces_20_stock)
nrow(cces_1020_stock) # 351k
cces_1020_stock <- as.data.frame(cces_1020_stock)

# cces_1020_stock
cces_1020_stock <- cces_1020_stock %>% 
  mutate(prez_D = ifelse(prez_vote==2, 1, 0),
         prez_R = ifelse(prez_vote==1, 1, 0))
cces_1020_stock <- as.data.frame(cces_1020_stock)

cces_1020_stock_prez <- cces_1020_stock %>% 
  filter(stock == 1) %>% as_survey(weights = c(weight))  %>% group_by(year) %>% 
  summarize(mean_prez_D = survey_mean(prez_D, na.rm = T), mean_prez_R = survey_mean(prez_R, na.rm = T)) %>%
  filter(mean_prez_R > 0 | mean_prez_D > 0) %>%
  mutate(prezDshare = mean_prez_D / (mean_prez_D + mean_prez_R))

cces_1020_stock_prez150k <- cces_1020_stock %>% 
  filter(stock == 1 & income>11 & income<32) %>% as_survey(weights = c(weight))  %>% group_by(year) %>% 
  summarize(mean_prez_D = survey_mean(prez_D, na.rm = T), mean_prez_R = survey_mean(prez_R, na.rm = T)) %>%
  filter(mean_prez_R > 0 | mean_prez_D > 0) %>%
  mutate(prezDshare = mean_prez_D / (mean_prez_D + mean_prez_R))

# PLOT!
prez_500k200kstock150k <- 
  ggplot() + 
  geom_point(cces_121620_500kplus_prez, mapping = aes(x = year, y = prezDshare), color = "darkgreen") +
  geom_line(cces_121620_500kplus_prez, mapping = aes(x = year, y = prezDshare), color = "darkgreen") +
  geom_point(cces_121620_200kplus_prez, mapping = aes(x = year, y = prezDshare), color = "chartreuse3") +
  geom_line(cces_121620_200kplus_prez, mapping = aes(x = year, y = prezDshare), color = "chartreuse3") +
  geom_point(cces_1020_stock_prez, mapping = aes(x = year, y = prezDshare), color = "deepskyblue2") +
  geom_line(cces_1020_stock_prez, mapping = aes(x = year, y = prezDshare), color = "deepskyblue2") +
  geom_point(cces_1020_stock_prez150k, mapping = aes(x = year, y = prezDshare), color = "blue2") +
  geom_line(cces_1020_stock_prez150k, mapping = aes(x = year, y = prezDshare), color = "blue2") +
  ylab("Dem. prez vote % - Repub. prez vote %") + xlab("Year") + 
  labs(title = "Democratic Presidential Vote Margin",
       subtitle = "By Top Incomes & Stock Ownership (CES 2010-2020)") +
  theme(legend.position = "bottom") +
  theme_bw() +
  geom_hline(yintercept = 0.5, color = "purple") +
  scale_y_continuous(breaks = seq(0, 1.0, by = 0.05), labels = scales::percent_format(accuracy = 1), 
                     limits = c(0.42, 0.68)) +
  scale_x_continuous(breaks = seq(2010, 2020, by = 2)) +
  annotate("text", x = 2012, y = 0.64, label = "$500,000+\n(Top 1%)", size = 4.5, color = "darkgreen", fontface = "bold") +
  annotate("text", x = 2019, y = 0.64, label = "$200,000+\n(Top 5-10%)", size = 4.5, color = "chartreuse3", fontface="bold")+
  annotate("text", x = 2016, y = 0.47, label = "All\nstock\nowners", size = 4, color = "deepskyblue2", fontface = "bold") +
  annotate("text", x = 2013.9, y = 0.55, label = "$150,000+\n(Top 10-15%)\nstock\nowners", size = 4, color = "blue2", 
           fontface = "bold") 

ggsave(filename = "prez_500k200kstock150k.jpg", plot = prez_500k200kstock150k)

############################ right - GSS top incomes by occupation

gss_PIDprez <- gss %>% mutate(pid7 = ifelse(partyid==0|partyid==1|partyid==2, 1,
                                            ifelse(partyid==3, 2, 
                                                   ifelse(partyid==4|partyid==5|partyid==6, 3, NA))),
                              pid_D = ifelse(partyid==0|partyid==1|partyid==2, 1, 0),
                              pid_R = ifelse(partyid==4|partyid==5|partyid==6, 1, 0),
                              prez_any = ifelse(!is.na(pres68), pres68,
                                                ifelse(!is.na(pres72), pres72,
                                                       ifelse(!is.na(pres76), pres76,
                                                              ifelse(!is.na(pres80), pres80,
                                                                     ifelse(!is.na(pres84), pres84,
                                                                            ifelse(!is.na(pres88), pres88,
                                                                                   ifelse(!is.na(pres92), pres92,
                                                                                          ifelse(!is.na(pres96), pres96,
                                                                                                 ifelse(!is.na(pres00), pres00,
                                                                                                        ifelse(!is.na(pres04), pres04,
                                                                                                               ifelse(!is.na(pres08), pres08,
                                                                                                                      ifelse(!is.na(pres12), pres12,
                                                                                                                             ifelse(!is.na(pres16), pres16, NA))))))))))))),
                              prez_D = ifelse(prez_any==1, 1, 0),
                              prez_R = ifelse(prez_any==2, 1, 0))

# GOAL: CEO/managers, biz/finance, professionals, service, blue collar
# https://www2.census.gov/programs-surveys/cps/methodology/Occupation%20Codes.pdf 
gss_occupcats <- gss_PIDprez %>% filter(year==2016|year==2012|year==2008|year==2004|year==2000|year==1996|year==1992|
                                          year==1988|year==1984|year==1980|year==1976|year==1972) %>% 
  mutate(occ_new = ifelse(occ10 < 441 | occ10==4000, "CEO/manager", 
                          ifelse(occ10 > 499 & occ10 < 961, "business/finance",
                                 ifelse((occ10 > 999 & occ10 < 1981)|(occ10 > 2099 & occ10 < 2181)|
                                          (occ10>2999 & occ10<3551), "professional/scientific",
                                        ifelse((occ10 > 2000 & occ10 < 2161)|(occ10>2204 & occ10<2971)|
                                                 (occ10>3600 & occ10<3961)|(occ10>4329 & occ10<4656),"human services/arts",
                                               ifelse((occ10>4001 & occ10<4256)|(occ10>6004 & occ10<9761), "blue collar", 
                                                      ifelse(occ10>4678 & occ10<5941, "admin/other service", NA)))))))

gss_occup_CEOmanager_prez <- gss_occupcats %>% filter(occ_new == "CEO/manager") %>% as_survey(weights = c(wtssall)) %>%
  group_by(year) %>%
  summarize(mean_prez_D = survey_mean(prez_D, na.rm = T), mean_prez_R = survey_mean(prez_R, na.rm = T))
gss_occup_CEOmanager_prez <- gss_occup_CEOmanager_prez %>% mutate(prezDshare = mean_prez_D / (mean_prez_D + mean_prez_R))

gss_occup_bizfinance_prez <- gss_occupcats %>% filter(occ_new == "business/finance") %>% as_survey(weights = c(wtssall)) %>%
  group_by(year) %>%
  summarize(mean_prez_D = survey_mean(prez_D, na.rm = T), mean_prez_R = survey_mean(prez_R, na.rm = T))
gss_occup_bizfinance_prez <- gss_occup_bizfinance_prez %>% mutate(prezDshare = mean_prez_D / (mean_prez_D + mean_prez_R))

gss_occup_profsci_prez <- gss_occupcats%>%filter(occ_new == "professional/scientific")%>%as_survey(weights = c(wtssall)) %>%
  group_by(year) %>%
  summarize(mean_prez_D = survey_mean(prez_D, na.rm = T), mean_prez_R = survey_mean(prez_R, na.rm = T))
gss_occup_profsci_prez <- gss_occup_profsci_prez %>% mutate(prezDshare = mean_prez_D / (mean_prez_D + mean_prez_R))

gss_occup_humanarts_prez <- gss_occupcats %>% filter(occ_new == "human services/arts") %>% as_survey(weights = c(wtssall)) %>%
  group_by(year) %>%
  summarize(mean_prez_D = survey_mean(prez_D, na.rm = T), mean_prez_R = survey_mean(prez_R, na.rm = T))
gss_occup_humanarts_prez <- gss_occup_humanarts_prez %>% mutate(prezDshare = mean_prez_D / (mean_prez_D + mean_prez_R))

gss_occup_bluecollar_prez <- gss_occupcats %>% filter(occ_new == "blue collar") %>% as_survey(weights = c(wtssall)) %>%
  group_by(year) %>%
  summarize(mean_prez_D = survey_mean(prez_D, na.rm = T), mean_prez_R = survey_mean(prez_R, na.rm = T))
gss_occup_bluecollar_prez <- gss_occup_bluecollar_prez %>% mutate(prezDshare = mean_prez_D / (mean_prez_D + mean_prez_R))

gss_occup_adminservice_prez <- gss_occupcats %>% filter(occ_new == "admin/other service")%>%as_survey(weights = c(wtssall)) %>%
  group_by(year) %>%
  summarize(mean_prez_D = survey_mean(prez_D, na.rm = T), mean_prez_R = survey_mean(prez_R, na.rm = T))
gss_occup_adminservice_prez <- gss_occup_adminservice_prez %>% mutate(prezDshare = mean_prez_D / (mean_prez_D + mean_prez_R))

# with top income categories
with(gss, table(year, income16)) # 16-18-21
with(gss, table(year, income06)) # 06-8-10-12-14
with(gss, table(year, income98)) # 98-00-02-04
with(gss, table(year, income91)) # 91-93-94-96
with(gss, table(year, income86)) # 86-90
with(gss, table(year, income82)) # 82-85
with(gss, table(year, income77)) # 77-80
with(gss, table(year, income72)) # 72

gss_occup_CEOmanager_prezINC <- gss_occupcats %>% filter(occ_new == "CEO/manager" & 
                                                           ((income16>22&income16<27)|(income06>21&income06<26)|
                                                              (income98>20&income98<24)|(income91>18&income91<22)|
                                                              (income86>17&income86<21)|(income82==16|income82==17)|
                                                              (income77>13&income77<17)|(income72>9&income72<13))) %>% 
  as_survey(weights = c(wtssall)) %>% group_by(year) %>%
  summarize(mean_prez_D = survey_mean(prez_D, na.rm = T), mean_prez_R = survey_mean(prez_R, na.rm = T))
gss_occup_CEOmanager_prezINC <- gss_occup_CEOmanager_prezINC %>% 
  mutate(prezDshare = mean_prez_D / (mean_prez_D + mean_prez_R))

gss_occup_bizfinance_prezINC <- gss_occupcats %>% filter(occ_new == "business/finance" & 
                                                           ((income16>22&income16<27)|(income06>21&income06<26)|
                                                              (income98>20&income98<24)|(income91>18&income91<22)|
                                                              (income86>17&income86<21)|(income82==16|income82==17)|
                                                              (income77>13&income77<17)|(income72>9&income72<13))) %>% 
  as_survey(weights = c(wtssall)) %>%group_by(year) %>%
  summarize(mean_prez_D = survey_mean(prez_D, na.rm = T), mean_prez_R = survey_mean(prez_R, na.rm = T))
gss_occup_bizfinance_prezINC <- gss_occup_bizfinance_prezINC %>% 
  mutate(prezDshare = mean_prez_D / (mean_prez_D + mean_prez_R))

gss_occup_profsci_prezINC <- gss_occupcats%>%filter(occ_new == "professional/scientific" & 
                                                      ((income16>22&income16<27)|(income06>21&income06<26)|
                                                         (income98>20&income98<24)|(income91>18&income91<22)|
                                                         (income86>17&income86<21)|(income82==16|income82==17)|
                                                         (income77>13&income77<17)|(income72>9&income72<13)))%>%
  as_survey(weights = c(wtssall)) %>%group_by(year) %>%
  summarize(mean_prez_D = survey_mean(prez_D, na.rm = T), mean_prez_R = survey_mean(prez_R, na.rm = T))
gss_occup_profsci_prezINC <- gss_occup_profsci_prezINC %>% mutate(prezDshare = mean_prez_D / (mean_prez_D + mean_prez_R))

gss_occup_humanarts_prezINC <- gss_occupcats %>% filter(occ_new == "human services/arts" & 
                                                          ((income16>22&income16<27)|(income06>21&income06<26)|
                                                             (income98>20&income98<24)|(income91>18&income91<22)|
                                                             (income86>17&income86<21)|(income82==16|income82==17)|
                                                             (income77>13&income77<17)|(income72>9&income72<13))) %>% 
  as_survey(weights = c(wtssall)) %>% group_by(year) %>%
  summarize(mean_prez_D = survey_mean(prez_D, na.rm = T), mean_prez_R = survey_mean(prez_R, na.rm = T))
gss_occup_humanarts_prezINC <- gss_occup_humanarts_prezINC %>% mutate(prezDshare = mean_prez_D / (mean_prez_D + mean_prez_R))

gss_occup_bluecollar_prezINC <- gss_occupcats %>% filter(occ_new == "blue collar" & 
                                                           ((income16>22&income16<27)|(income06>21&income06<26)|
                                                              (income98>20&income98<24)|(income91>18&income91<22)|
                                                              (income86>17&income86<21)|(income82==16|income82==17)|
                                                              (income77>13&income77<17)|(income72>9&income72<13))) %>% as_survey(weights = c(wtssall)) %>%
  group_by(year) %>%
  summarize(mean_prez_D = survey_mean(prez_D, na.rm = T), mean_prez_R = survey_mean(prez_R, na.rm = T))
gss_occup_bluecollar_prezINC <- gss_occup_bluecollar_prezINC %>%
  mutate(prezDshare = mean_prez_D / (mean_prez_D + mean_prez_R))

gss_occup_adminservice_prezINC <- gss_occupcats %>% filter(occ_new == "admin/other service" & 
                                                             ((income16>22&income16<27)|(income06>21&income06<26)|
                                                                (income98>20&income98<24)|(income91>18&income91<22)|
                                                                (income86>17&income86<21)|(income82==16|income82==17)|
                                                                (income77>13&income77<17)|(income72>9&income72<13)))%>%
  as_survey(weights = c(wtssall)) %>%group_by(year) %>%
  summarize(mean_prez_D = survey_mean(prez_D, na.rm = T), mean_prez_R = survey_mean(prez_R, na.rm = T))
gss_occup_adminservice_prezINC <- gss_occup_adminservice_prezINC %>% 
  mutate(prezDshare = mean_prez_D / (mean_prez_D + mean_prez_R))

gss_occup_CEOmanager_prezINC2 <- gss_occup_CEOmanager_prezINC %>% mutate(group="CEOmanager")
gss_occup_bizfinance_prezINC2 <- gss_occup_bizfinance_prezINC %>% mutate(group="bizfinance")
gss_occup_profsci_prezINC2 <- gss_occup_profsci_prezINC %>% mutate(group="profsci")
gss_occup_humanarts_prezINC2 <- gss_occup_humanarts_prezINC %>% mutate(group="humanarts")
gss_occup_bluecollar_prezINC2 <- gss_occup_bluecollar_prezINC %>% mutate(group="bluecollar")
gss_occup_adminservice_prezINC2 <- gss_occup_adminservice_prezINC %>% mutate(group="adminservice")
gss_occup_prez_ALL <- rbind(gss_occup_CEOmanager_prezINC2, gss_occup_bizfinance_prezINC2,
                            gss_occup_profsci_prezINC2, gss_occup_humanarts_prezINC2,
                            gss_occup_bluecollar_prezINC2, gss_occup_adminservice_prezINC2)

plot_GSSbyoccupation_prezINC_2gether <- 
  ggplot(data = gss_occup_prez_ALL, mapping = aes(year, prezDshare, color = group)) +
  geom_line(data = gss_occup_prez_ALL, mapping = aes(year, prezDshare, color = group), alpha = 0.4) +
  geom_point(size = 1.0, alpha = 0.5) +
  geom_smooth(method = "lm", se = FALSE) +
  ylab("Democratic Share of 2-Party Vote") + xlab("Year") +
  labs(title = "Democratic Presidential Voting By Occupation Type",
       subtitle = "ONLY Top 25% by Income (GSS, 1972-2016)") +
  theme(legend.position = "bottom") +
  theme_bw() +
  geom_hline(yintercept = 0.5, color = "purple") +
  scale_y_continuous(breaks = seq(0, 0.9, by = 0.05), labels = scales::percent_format(accuracy = 1), 
                     limits = c(0.15, 0.8)) +
  scale_x_continuous(breaks = seq(1972, 2016, by = 4), limits = c(1972, 2016)) +
  scale_color_manual(name = "Occupation\ncategory",
                     values = c("green", "gold", "purple", "black", "deeppink1", "turquoise2"),
                     labels = c("Admin/other service", "Business/finance","Blue collar",
                                "CEO/manager","Human services/arts","Professional/scientific")) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  theme(legend.position = "bottom") +
  guides(color=guide_legend(nrow=3, byrow=TRUE))

ggsave(filename = "plot_GSSbyoccupation_prezINC_2gether.jpg", plot = plot_GSSbyoccupation_prezINC_2gether)

#

#################################################### Figure 4 ####################################################

############################ upper left - $150k+ by Education

cces_DVs <- cces %>% mutate(
  PID_D = ifelse(pid7 == 1 | pid7 == 2 | pid7 == 3, 1, 0),
  PID_R = ifelse(pid7 == 5 | pid7 == 6 | pid7 == 7, 1, 0),
  prez_D = ifelse(voted_pres_party == 1, 1, 0),
  prez_R = ifelse(voted_pres_party == 2, 1, 0),
  educ_new = ifelse(educ<5, 1, 
                    ifelse(educ==5, 2, 
                           ifelse(educ==6, 3, NA)))
)

DsEd_150kplus <- cces_DVs %>%
  filter(faminc == 12) %>% as_survey(weights = c(weight)) %>% group_by(educ_new, year) %>% 
  summarize(mean_prez_D = survey_mean(prez_D, na.rm = T), mean_prez_R = survey_mean(prez_R, na.rm = T),
            mean_PID_D = survey_mean(PID_D, na.rm = T), mean_PID_R = survey_mean(PID_R, na.rm = T))

DsEd_150kplus <- DsEd_150kplus %>% 
  filter(year == 2008 | year == 2010 | year == 2012 | year == 2014 |year == 2016 | year == 2018 | year == 2020)

DsEd_150kplus_shares2008 <- cces_DVs %>% filter(faminc == 12 & year == 2008) %>% 
  as_survey(weights = c(weight)) %>% group_by(educ_new) %>%
  survey_tally()
sum(DsEd_150kplus_shares2008$n) # 1349
DsEd_150kplus_shares2008[1,2]/sum(DsEd_150kplus_shares2008$n) # 41.4% # LESS THAN COLLEG
DsEd_150kplus_shares2008[2,2]/sum(DsEd_150kplus_shares2008$n) # 30.8%
DsEd_150kplus_shares2008[3,2]/sum(DsEd_150kplus_shares2008$n) # 27.8% # sum = 100.0
41.4+30.8+27.8

DsEd_150kplus_shares2020 <- cces_DVs %>% filter(faminc == 12 & year == 2020) %>% 
  as_survey(weights = c(weight)) %>% group_by(educ_new) %>%
  survey_tally()
sum(DsEd_150kplus_shares2020$n) # 1349
DsEd_150kplus_shares2020[1,2]/sum(DsEd_150kplus_shares2020$n) # 31.1%
DsEd_150kplus_shares2020[2,2]/sum(DsEd_150kplus_shares2020$n) # 33.4%
DsEd_150kplus_shares2020[3,2]/sum(DsEd_150kplus_shares2020$n) # 35.5% # sum = 100.0
31.1+33.4+35.5

# plot
plot_DsbyEd_150kplus <- 
  ggplot() +
  geom_line(data = DsEd_150kplus, mapping = aes(x = year, y = mean_prez_D, color = as.factor(educ_new))) +
  geom_point(data = DsEd_150kplus, mapping = aes(x = year, y = mean_prez_D, color = as.factor(educ_new))) +
  scale_color_brewer(palette="Set1") +
  ylab("Share voting for Dem. candidate") + xlab("Year") +
  labs(title = "$150,000+ Family Income: Dem. Prez Voting by Education",
       subtitle = "%s on chart = Group's share of total in 2008 or 2020 (CCES, 2008-20)") +
  theme(legend.position = "bottom") +
  theme_bw() +
  geom_hline(yintercept = 0.5, color = "purple") +
  scale_y_continuous(breaks = seq(0, 1.0, by = 0.05), labels = scales::percent_format(accuracy=1), 
                     limits = c(0.3, 0.7)) +
  scale_x_continuous(breaks = seq(2006, 2020, by = 2), limits = c(2008, 2020)) +
  scale_color_discrete(name="Education level", labels=c("Less-than-college", "College", 
                                                        "Post-grad")) +
  annotate("text", x = 2008.4, y = .34, label = "41.4%", size = 4.5, color = "#F8766D", fontface = "bold") +
  annotate("text", x = 2008.4, y = .40, label = "30.8%", size = 4.5, color = "#00B81F", fontface = "bold") +
  annotate("text", x = 2008.4, y = .47, label = "27.8%", size = 4.5, color = "#00A5FF", fontface = "bold") +
  annotate("text", x = 2019.6, y = .47, label = "31.1%", size = 4.5, color = "#F8766D", fontface = "bold") +
  annotate("text", x = 2019.6, y = .60, label = "33.4%", size = 4.5, color = "#00B81F", fontface = "bold") +
  annotate("text", x = 2019.6, y = .69, label = "35.5%", size = 4.5, color = "#00A5FF", fontface = "bold") 

ggsave("plot_DsbyEd_150kplus.jpg", plot_DsbyEd_150kplus)

############################ upper right - $150k+ by Race/ethnicity

cces_DVs_race <- cces_DVs %>% filter(race<5)

DsRace_150kplus <- cces_DVs_race %>%
  filter(faminc == 12) %>% as_survey(weights = c(weight)) %>% group_by(race, year) %>% 
  summarize(mean_prez_D = survey_mean(prez_D, na.rm = T), mean_prez_R = survey_mean(prez_R, na.rm = T),
            mean_PID_D = survey_mean(PID_D, na.rm = T), mean_PID_R = survey_mean(PID_R, na.rm = T))

DsRace_150kplus <- DsRace_150kplus %>% 
  filter(year == 2008 | year == 2010 | year == 2012 | year == 2014 |year == 2016 | year == 2018 | year == 2020)

DsRace_150kplus_shares2008 <- cces_DVs %>% filter(race < 5 & faminc == 12 & year == 2008) %>% 
  as_survey(weights = c(weight)) %>% group_by(race) %>%
  survey_tally()
sum(DsRace_150kplus_shares2008$n) # 
DsRace_150kplus_shares2008[1,2]/sum(DsRace_150kplus_shares2008$n) # 85.8% # WHITE
DsRace_150kplus_shares2008[2,2]/sum(DsRace_150kplus_shares2008$n) # 6.4%
DsRace_150kplus_shares2008[3,2]/sum(DsRace_150kplus_shares2008$n) # 5.1% 
DsRace_150kplus_shares2008[4,2]/sum(DsRace_150kplus_shares2008$n) # 2.7%  # sum = 100.0
85.8 + 6.4 + 5.1 + 2.7

DsRace_150kplus_shares2020 <- cces_DVs %>% filter(race < 5 & faminc == 12 & year == 2020) %>% 
  as_survey(weights = c(weight)) %>% group_by(race) %>%
  survey_tally()
sum(DsRace_150kplus_shares2020$n) # 
DsRace_150kplus_shares2020[1,2]/sum(DsRace_150kplus_shares2020$n) # 78.9%
DsRace_150kplus_shares2020[2,2]/sum(DsRace_150kplus_shares2020$n) # 8.1%
DsRace_150kplus_shares2020[3,2]/sum(DsRace_150kplus_shares2020$n) # 6.7% 
DsRace_150kplus_shares2020[4,2]/sum(DsRace_150kplus_shares2020$n) # 6.2% # sum = 99.9
78.9 + 8.1 + 6.7 + 6.2

# plot
plot_DsRace_150kplus <- 
  ggplot() +
  geom_line(data = DsRace_150kplus, mapping = aes(x = year, y = mean_prez_D, color = as.factor(race))) +
  geom_point(data = DsRace_150kplus, mapping = aes(x = year, y = mean_prez_D, color = as.factor(race))) +
  scale_color_brewer(palette="Set1") +
  ylab("Share voting for Dem. candidate") + xlab("Year") +
  labs(title = "$150,000+ Family Income: Dem. Prez Voting by Race/Ethnicity",
       subtitle = "%s on chart = Group's share of total in 2008 or 2020 (CCES, 2008-20)") +
  theme_bw() +
  geom_hline(yintercept = 0.5, color = "purple") +
  scale_y_continuous(breaks = seq(0, 1.0, by = 0.05), labels = scales::percent_format(accuracy=1), 
                     limits = c(0.35, 0.95)) +
  scale_x_continuous(breaks = seq(2006, 2020, by = 2), limits = c(2008, 2020)) +
  scale_color_discrete(name="Race/ethnicity", labels=c("White", "Black", 
                                                       "Hispanic", "Asian")) +
  annotate("text", x = 2008.4, y = .37, label = "85.8%", size = 4.5, color = "#F8766D", fontface = "bold") +
  annotate("text", x = 2008.4, y = .67, label = "6.4%", size = 4.5, color = "#7CAE00", fontface = "bold") +
  annotate("text", x = 2008.4, y = .53, label = "5.1%", size = 4.5, color = "#00BFC4", fontface = "bold") +
  annotate("text", x = 2008.4, y = .45, label = "2.7%", size = 4.5, color = "#C77CFF", fontface = "bold") +
  annotate("text", x = 2019.6, y = .48, label = "78.9%", size = 4.5, color = "#F8766D", fontface = "bold") +
  annotate("text", x = 2019.6, y = .92, label = "8.1%", size = 4.5, color = "#7CAE00", fontface = "bold") +
  annotate("text", x = 2019.6, y = .63, label = "6.7%", size = 4.5, color = "#00BFC4", fontface = "bold") +
  annotate("text", x = 2019.6, y = .77, label = "6.2%", size = 4.5, color = "#C77CFF", fontface = "bold") +
  theme(legend.position = "bottom") +
  guides(color=guide_legend(nrow=2, byrow=TRUE))

ggsave("plot_DsRace_150kplus.jpg", plot_DsRace_150kplus)

############################ bottom center - $150k+ by Population density

NCHSURCodes2013$county_fips <- NCHSURCodes2013$FIPS.code

cces_countycodes <- merge(cces, NCHSURCodes2013, by.x = "county_fips")

cces_countycodes_DVs <- cces_countycodes %>% mutate(
  PID_D = ifelse(pid7 == 1 | pid7 == 2 | pid7 == 3, 1, 0),
  PID_R = ifelse(pid7 == 5 | pid7 == 6 | pid7 == 7, 1, 0),
  prez_D = ifelse(voted_pres_party == 1, 1, 0),
  prez_R = ifelse(voted_pres_party == 2, 1, 0)
)

Dsbycountycode_150kplus <- cces_countycodes_DVs %>% 
  filter(voted_pres_party < 4 & faminc == 12) %>% as_survey(weights = c(weight)) %>% group_by(X2013.code, year) %>% 
  summarize(mean_prez_D = survey_mean(prez_D, na.rm = T), mean_prez_R = survey_mean(prez_R, na.rm = T),
            mean_PID_D = survey_mean(PID_D, na.rm = T), mean_PID_R = survey_mean(PID_R, na.rm = T))

Dsbycountycode_150kplus <- Dsbycountycode_150kplus %>% 
  filter(year == 2008 | year == 2012 | year == 2016 | year == 2020)
Dsbycountycode_150kplus %>% print(n = Inf)

nrow(cces_countycodes_DVs) # 460k
cces_countycodes_DVs %>% filter(faminc == 12) %>% nrow() # 25k
cces_countycodes_DVs %>% filter(faminc == 12 & year == 2008) %>% nrow() # 1564

# check share in each category in 2008, 2020
Dsbycountycode_150kplus_shares2008 <- cces_countycodes_DVs %>% filter(faminc == 12 & year == 2008) %>% 
  as_survey(weights = c(weight)) %>% group_by(X2013.code) %>%
  survey_tally()
sum(Dsbycountycode_150kplus_shares2008$n) # 1349
Dsbycountycode_150kplus_shares2008[1,2]/sum(Dsbycountycode_150kplus_shares2008$n) # 30.2%
Dsbycountycode_150kplus_shares2008[2,2]/sum(Dsbycountycode_150kplus_shares2008$n) # 39.4%
Dsbycountycode_150kplus_shares2008[3,2]/sum(Dsbycountycode_150kplus_shares2008$n) # 17.3%
Dsbycountycode_150kplus_shares2008[4,2]/sum(Dsbycountycode_150kplus_shares2008$n) # 5.7%
Dsbycountycode_150kplus_shares2008[5,2]/sum(Dsbycountycode_150kplus_shares2008$n) # 4.5%
Dsbycountycode_150kplus_shares2008[6,2]/sum(Dsbycountycode_150kplus_shares2008$n) # 3.0% # sum = 100.1

Dsbycountycode_150kplus_shares2020 <- cces_countycodes_DVs %>% filter(faminc == 12 & year == 2020) %>% 
  as_survey(weights = c(weight)) %>% group_by(X2013.code) %>%
  survey_tally()
sum(Dsbycountycode_150kplus_shares2020$n) # 13266
Dsbycountycode_150kplus_shares2020[1,2]/sum(Dsbycountycode_150kplus_shares2020$n) # 32.1%
Dsbycountycode_150kplus_shares2020[2,2]/sum(Dsbycountycode_150kplus_shares2020$n) # 38.9%
Dsbycountycode_150kplus_shares2020[3,2]/sum(Dsbycountycode_150kplus_shares2020$n) # 15.5%
Dsbycountycode_150kplus_shares2020[4,2]/sum(Dsbycountycode_150kplus_shares2020$n) # 7.4%
Dsbycountycode_150kplus_shares2020[5,2]/sum(Dsbycountycode_150kplus_shares2020$n) # 3.8%
Dsbycountycode_150kplus_shares2020[6,2]/sum(Dsbycountycode_150kplus_shares2020$n) # 2.2% # sum = 99.9
32.1+38.9+15.5+7.4+3.8+2.2

# plot
plot_Dsbycountycode_150kplus <- 
  ggplot() +
  geom_point(data = Dsbycountycode_150kplus, mapping = aes(x = year, y = mean_prez_D, color = as.factor(X2013.code))) +
  geom_line(data = Dsbycountycode_150kplus, mapping = aes(x = year, y = mean_prez_D, color = as.factor(X2013.code))) +
  scale_color_brewer(palette="Set1") +
  ylab("Share voting for Dem. candidate") + xlab("Year") +
  labs(title = "$150,000+ Family Income: Dem. Prez Voting by County Type",
       subtitle = "%s on chart = Group's share of total in 2008 or 2020 (CCES, 2008-20)") +
  theme_bw() +
  geom_hline(yintercept = 0.5, color = "purple") +
  scale_y_continuous(breaks = seq(0, 1.0, by = 0.05), labels = scales::percent_format(accuracy=1), 
                     limits = c(0.2, 0.7)) +
  scale_x_continuous(breaks = seq(2006, 2020, by = 2), limits = c(2008, 2020)) +
  scale_color_discrete(name="County type", labels=c("Central metro\n(MSA pop 1m+)", "Fringe metro\n(in MSA 1m+)", 
                                                    "Medium metro\n(250k-1m)", "Small metro\n(50-250k)", 
                                                    "Micropolitan", "Non-core")) +
  annotate("text", x = 2008.4, y = .7, label = "30.2%", size = 4.5, color = "#F8766D", fontface = "bold") +
  annotate("text", x = 2008.4, y = .665, label = "39.4%", size = 4.5, color = "#BB9D00", fontface = "bold") +
  annotate("text", x = 2008.4, y = .63, label = "17.3%", size = 4.5, color = "#00B81F", fontface = "bold") +
  annotate("text", x = 2008.4, y = .595, label = "5.7%", size = 4.5, color = "#00C0B8", fontface = "bold") +
  annotate("text", x = 2008.4, y = .56, label = "4.5%", size = 4.5, color = "#00A5FF", fontface = "bold") +
  annotate("text", x = 2008.4, y = .525, label = "3.0%", size = 4.5, color = "#E76BF3", fontface = "bold") +
  annotate("text", x = 2019.6, y = .68, label = "32.1%", size = 4.5, color = "#F8766D", fontface = "bold") +
  annotate("text", x = 2019.6, y = .60, label = "38.9%", size = 4.5, color = "#BB9D00", fontface = "bold") +
  annotate("text", x = 2019.6, y = .53, label = "15.5%", size = 4.5, color = "#00B81F", fontface = "bold") +
  annotate("text", x = 2019.6, y = .46, label = "7.4%", size = 4.5, color = "#00C0B8", fontface = "bold") +
  annotate("text", x = 2019.6, y = .39, label = "3.8%", size = 4.5, color = "#00A5FF", fontface = "bold") +
  annotate("text", x = 2019.6, y = .34, label = "2.2%", size = 4.5, color = "#E76BF3", fontface = "bold") +
  theme(legend.position = "bottom") +
  guides(color=guide_legend(nrow=2, byrow=TRUE))

ggsave("plot_Dsbycountycode_150kplus.jpg", plot_Dsbycountycode_150kplus)

#################################################### Figure 5 ####################################################

############################ left - ANES regressions

ANES_prez_reg <- ANES %>% filter(VCF0705 == "1. Democrat" | VCF0705 == "2. Republican"
                                 | VCF0705 == "3. Other (incl. 3d/minor party candidates and write-ins)") %>%
  mutate(prez_vote = ifelse(VCF0705 == '1. Democrat', 2,
                            ifelse(VCF0705=='2. Republican', 1, 0)),
         income = ifelse(VCF0114=='5. 96 to 100 percentile', 5,
                         ifelse(VCF0114== '4. 68 to 95 percentile', 4,
                                ifelse(VCF0114=='3. 34 to 67 percentile', 3,
                                       ifelse(VCF0114=='2. 17 to 33 percentile', 2,
                                              ifelse(VCF0114=='1. 0 to 16 percentile', 1, NA))))),
         incometop5 = ifelse(VCF0114=='5. 96 to 100 percentile', 1, 
                             ifelse(VCF0114== '4. 68 to 95 percentile'|VCF0114=='3. 34 to 67 percentile'|
                                      VCF0114=='2. 17 to 33 percentile' | VCF0114=='1. 0 to 16 percentile', 0, NA)),
         incometopthird = ifelse(VCF0114=='5. 96 to 100 percentile'|VCF0114== '4. 68 to 95 percentile', 1,
                                 ifelse(VCF0114=='3. 34 to 67 percentile'|
                                          VCF0114=='2. 17 to 33 percentile' | VCF0114=='1. 0 to 16 percentile', 0, NA)),
         year = VCF0004)

ANES_20_prez_reg <- ANES_20 %>% filter(V202110x > 0) %>%
  mutate(prez_vote = ifelse(V202110x == 1, 2, 
                            ifelse(V202110x == 2, 1, NA)),
         income = ifelse(V201617x>21, 5,
                         ifelse(V201617x>17 & V201617x<22, 4,
                                ifelse(V201617x > 8 & V201617x < 17, 3,
                                       ifelse(V201617x > 4 & V201617x < 9, 2,
                                              ifelse(V201617x < 5, 1, NA))))),
         incometop5 = ifelse(V201617x>21, 1,
                             ifelse(V201617x<22, 0, NA)),
         incometopthird = ifelse(V201617x>17, 1,
                                 ifelse(V201617x<18, 0, NA)),
         year = 2020)

ANES_prez_regmore <- ANES_prez_reg %>%
  mutate(college = ifelse(VCF0140=="1. 8 grades or less ('grade school')"|
                            VCF0140=="2. 9-12 grades ('high school'), no diploma/equivalency"|
                            VCF0140=="3. 12 grades, diploma or equivalency"|
                            VCF0140=="4. 12 grades, diploma or equivalency plus non-academic"|
                            VCF0140=="5. Some college, no degree; junior/community college", 0,
                          ifelse(VCF0140=="6. BA level degrees; advanced degrees incl. LLB", 1, NA)),
         white = ifelse(VCF0106=="2. Black non-Hispanic"|VCF0106=="3. Other", 0,
                        ifelse(VCF0106=="1. White non-Hispanic", 1, NA)),
         female = ifelse(VCF0104=="1. Male", 0,
                         ifelse(VCF0104=="2. Female", 1, NA)))

ANES_20_prez_regmore <- ANES_20_prez_reg %>% 
  mutate(college = ifelse(V201511x>0 & V201511x<4, 0, 
                          ifelse(V201511x>3, 1, NA)),
         white = ifelse(V201549x>1, 0,
                        ifelse(V201549x==1, 1, NA)),
         female = ifelse(V201600==2, 1,
                         ifelse(V201600==1, 0, NA)))

ANES_prez_regmore2 <- ANES_prez_regmore %>% select(prez_vote, income, incometop5, incometopthird,year, college, white, female)
ANES_20_prez_regmore2 <- ANES_20_prez_regmore %>% select(prez_vote, income,incometop5, incometopthird,year,college,white,
                                                         female)

ANES_prez_bothmore <- rbind(ANES_prez_regmore2, ANES_20_prez_regmore2)

ANES_prez_regmore_2020 <- ANES_prez_bothmore %>% filter(year==2020)
ANES_prez_regmore_2016 <- ANES_prez_bothmore %>% filter(year==2016)
ANES_prez_regmore_2012 <- ANES_prez_bothmore %>% filter(year==2012)
ANES_prez_regmore_2008 <- ANES_prez_bothmore %>% filter(year==2008)
ANES_prez_regmore_2004 <- ANES_prez_bothmore %>% filter(year==2004)
ANES_prez_regmore_2000 <- ANES_prez_bothmore %>% filter(year==2000)
ANES_prez_regmore_1996 <- ANES_prez_bothmore %>% filter(year==1996)
ANES_prez_regmore_1992 <- ANES_prez_bothmore %>% filter(year==1992)
ANES_prez_regmore_1988 <- ANES_prez_bothmore %>% filter(year==1988)
ANES_prez_regmore_1984 <- ANES_prez_bothmore %>% filter(year==1984)
ANES_prez_regmore_1980 <- ANES_prez_bothmore %>% filter(year==1980)
ANES_prez_regmore_1976 <- ANES_prez_bothmore %>% filter(year==1976)
ANES_prez_regmore_1972 <- ANES_prez_bothmore %>% filter(year==1972)
ANES_prez_regmore_1968 <- ANES_prez_bothmore %>% filter(year==1968)

lm_ANES_prez_regmore_2020 <- lm_robust(prez_vote ~ incometop5 + college + white + female, data = ANES_prez_regmore_2020)
lm_ANES_prez_regmore_2016 <- lm_robust(prez_vote ~ incometop5 + college + white + female, data = ANES_prez_regmore_2016)
lm_ANES_prez_regmore_2012 <- lm_robust(prez_vote ~ incometop5 + college + white + female, data = ANES_prez_regmore_2012)
lm_ANES_prez_regmore_2008 <- lm_robust(prez_vote ~ incometop5 + college + white + female, data = ANES_prez_regmore_2008)
lm_ANES_prez_regmore_2004 <- lm_robust(prez_vote ~ incometop5 + college + white + female, data = ANES_prez_regmore_2004)
lm_ANES_prez_regmore_2000 <- lm_robust(prez_vote ~ incometop5 + college + white + female, data = ANES_prez_regmore_2000)
lm_ANES_prez_regmore_1996 <- lm_robust(prez_vote ~ incometop5 + college + white + female, data = ANES_prez_regmore_1996)
lm_ANES_prez_regmore_1992 <- lm_robust(prez_vote ~ incometop5 + college + white + female, data = ANES_prez_regmore_1992)
lm_ANES_prez_regmore_1988 <- lm_robust(prez_vote ~ incometop5 + college + white + female, data = ANES_prez_regmore_1988)
lm_ANES_prez_regmore_1984 <- lm_robust(prez_vote ~ incometop5 + college + white + female, data = ANES_prez_regmore_1984)
lm_ANES_prez_regmore_1980 <- lm_robust(prez_vote ~ incometop5 + college + white + female, data = ANES_prez_regmore_1980)
lm_ANES_prez_regmore_1976 <- lm_robust(prez_vote ~ incometop5 + college + white + female, data = ANES_prez_regmore_1976)
lm_ANES_prez_regmore_1972 <- lm_robust(prez_vote ~ incometop5 + college + white + female, data = ANES_prez_regmore_1972)

lm_ANES_prez_regmore_2020top3rd <- lm_robust(prez_vote ~ incometopthird + college + white + female, 
                                             data = ANES_prez_regmore_2020)
lm_ANES_prez_regmore_2016top3rd <- lm_robust(prez_vote ~ incometopthird + college + white + female, 
                                             data = ANES_prez_regmore_2016)
lm_ANES_prez_regmore_2012top3rd <- lm_robust(prez_vote ~ incometopthird + college + white + female, 
                                             data = ANES_prez_regmore_2012)
lm_ANES_prez_regmore_2008top3rd <- lm_robust(prez_vote ~ incometopthird + college + white + female, 
                                             data = ANES_prez_regmore_2008)
lm_ANES_prez_regmore_2004top3rd <- lm_robust(prez_vote ~ incometopthird + college + white + female, 
                                             data = ANES_prez_regmore_2004)
lm_ANES_prez_regmore_2000top3rd <- lm_robust(prez_vote ~ incometopthird + college + white + female, 
                                             data = ANES_prez_regmore_2000)
lm_ANES_prez_regmore_1996top3rd <- lm_robust(prez_vote ~ incometopthird + college + white + female, 
                                             data = ANES_prez_regmore_1996)
lm_ANES_prez_regmore_1992top3rd <- lm_robust(prez_vote ~ incometopthird + college + white + female, 
                                             data = ANES_prez_regmore_1992)
lm_ANES_prez_regmore_1988top3rd <- lm_robust(prez_vote ~ incometopthird + college + white + female, 
                                             data = ANES_prez_regmore_1988)
lm_ANES_prez_regmore_1984top3rd <- lm_robust(prez_vote ~ incometopthird + college + white + female, 
                                             data = ANES_prez_regmore_1984)
lm_ANES_prez_regmore_1980top3rd <- lm_robust(prez_vote ~ incometopthird + college + white + female, 
                                             data = ANES_prez_regmore_1980)
lm_ANES_prez_regmore_1976top3rd <- lm_robust(prez_vote ~ incometopthird + college + white + female, 
                                             data = ANES_prez_regmore_1976)
lm_ANES_prez_regmore_1972top3rd <- lm_robust(prez_vote ~ incometopthird + college + white + female, 
                                             data = ANES_prez_regmore_1972)

# COEF PLOT:
ANES_prez_regsmore_df <- data.frame(
  estimate = c(lm_ANES_prez_regmore_1972$coefficients[2], lm_ANES_prez_regmore_1976$coefficients[2], 
               lm_ANES_prez_regmore_1980$coefficients[2], lm_ANES_prez_regmore_1984$coefficients[2], 
               lm_ANES_prez_regmore_1988$coefficients[2], lm_ANES_prez_regmore_1992$coefficients[2],
               lm_ANES_prez_regmore_1996$coefficients[2], lm_ANES_prez_regmore_2000$coefficients[2],
               lm_ANES_prez_regmore_2004$coefficients[2], lm_ANES_prez_regmore_2008$coefficients[2],
               lm_ANES_prez_regmore_2012$coefficients[2], lm_ANES_prez_regmore_2016$coefficients[2],
               lm_ANES_prez_regmore_2020$coefficients[2], 
               lm_ANES_prez_regmore_1972top3rd$coefficients[2], lm_ANES_prez_regmore_1976top3rd$coefficients[2], 
               lm_ANES_prez_regmore_1980top3rd$coefficients[2], lm_ANES_prez_regmore_1984top3rd$coefficients[2], 
               lm_ANES_prez_regmore_1988top3rd$coefficients[2], lm_ANES_prez_regmore_1992top3rd$coefficients[2],
               lm_ANES_prez_regmore_1996top3rd$coefficients[2], lm_ANES_prez_regmore_2000top3rd$coefficients[2],
               lm_ANES_prez_regmore_2004top3rd$coefficients[2], lm_ANES_prez_regmore_2008top3rd$coefficients[2],
               lm_ANES_prez_regmore_2012top3rd$coefficients[2], lm_ANES_prez_regmore_2016top3rd$coefficients[2],
               lm_ANES_prez_regmore_2020top3rd$coefficients[2]),
  conf.low = c(lm_ANES_prez_regmore_1972$conf.low[2], lm_ANES_prez_regmore_1976$conf.low[2], 
               lm_ANES_prez_regmore_1980$conf.low[2], lm_ANES_prez_regmore_1984$conf.low[2], 
               lm_ANES_prez_regmore_1988$conf.low[2], lm_ANES_prez_regmore_1992$conf.low[2],
               lm_ANES_prez_regmore_1996$conf.low[2], lm_ANES_prez_regmore_2000$conf.low[2],
               lm_ANES_prez_regmore_2004$conf.low[2], lm_ANES_prez_regmore_2008$conf.low[2],
               lm_ANES_prez_regmore_2012$conf.low[2], lm_ANES_prez_regmore_2016$conf.low[2],
               lm_ANES_prez_regmore_2020$conf.low[2],
               lm_ANES_prez_regmore_1972top3rd$conf.low[2], lm_ANES_prez_regmore_1976top3rd$conf.low[2], 
               lm_ANES_prez_regmore_1980top3rd$conf.low[2], lm_ANES_prez_regmore_1984top3rd$conf.low[2], 
               lm_ANES_prez_regmore_1988top3rd$conf.low[2], lm_ANES_prez_regmore_1992top3rd$conf.low[2],
               lm_ANES_prez_regmore_1996top3rd$conf.low[2], lm_ANES_prez_regmore_2000top3rd$conf.low[2],
               lm_ANES_prez_regmore_2004top3rd$conf.low[2], lm_ANES_prez_regmore_2008top3rd$conf.low[2],
               lm_ANES_prez_regmore_2012top3rd$conf.low[2], lm_ANES_prez_regmore_2016top3rd$conf.low[2],
               lm_ANES_prez_regmore_2020top3rd$conf.low[2]),
  conf.high = c(lm_ANES_prez_regmore_1972$conf.high[2], lm_ANES_prez_regmore_1976$conf.high[2], 
                lm_ANES_prez_regmore_1980$conf.high[2], lm_ANES_prez_regmore_1984$conf.high[2], 
                lm_ANES_prez_regmore_1988$conf.high[2], lm_ANES_prez_regmore_1992$conf.high[2],
                lm_ANES_prez_regmore_1996$conf.high[2], lm_ANES_prez_regmore_2000$conf.high[2],
                lm_ANES_prez_regmore_2004$conf.high[2], lm_ANES_prez_regmore_2008$conf.high[2],
                lm_ANES_prez_regmore_2012$conf.high[2], lm_ANES_prez_regmore_2016$conf.high[2],
                lm_ANES_prez_regmore_2020$conf.high[2],
                lm_ANES_prez_regmore_1972top3rd$conf.high[2], lm_ANES_prez_regmore_1976top3rd$conf.high[2], 
                lm_ANES_prez_regmore_1980top3rd$conf.high[2], lm_ANES_prez_regmore_1984top3rd$conf.high[2], 
                lm_ANES_prez_regmore_1988top3rd$conf.high[2], lm_ANES_prez_regmore_1992top3rd$conf.high[2],
                lm_ANES_prez_regmore_1996top3rd$conf.high[2], lm_ANES_prez_regmore_2000top3rd$conf.high[2],
                lm_ANES_prez_regmore_2004top3rd$conf.high[2], lm_ANES_prez_regmore_2008top3rd$conf.high[2],
                lm_ANES_prez_regmore_2012top3rd$conf.high[2], lm_ANES_prez_regmore_2016top3rd$conf.high[2],
                lm_ANES_prez_regmore_2020top3rd$conf.high[2]),
  Treatment = c("1972","1976","1980","1984","1988","1992","1996","2000","2004","2008","2012","2016","2020",
                "1972","1976","1980","1984","1988","1992","1996","2000","2004","2008","2012","2016","2020"),
  IncomeType = c("Top 5%","Top 5%","Top 5%","Top 5%","Top 5%","Top 5%","Top 5%","Top 5%","Top 5%","Top 5%",
                 "Top 5%","Top 5%","Top 5%",
                 "Top 33%","Top 33%","Top 33%","Top 33%","Top 33%","Top 33%","Top 33%","Top 33%","Top 33%",
                 "Top 33%","Top 33%","Top 33%","Top 33%"))

positions_ANES_prez_regsmore <- c("1972","1976","1980","1984","1988","1992","1996","2000","2004","2008","2012","2016","2020")

plot_coef_ANES_prez_regsmore <- 
  ggplot(ANES_prez_regsmore_df, mapping = aes(x = estimate, y = Treatment, color = IncomeType),
         position = position_dodge(width = 0.6)) +
  geom_point(position = position_dodge(width = 0.6)) +
  geom_errorbar(mapping = aes(xmin = conf.low, xmax = conf.high), width = 0.3, position = position_dodge(width = 0.6)) +
  geom_vline(xintercept = 0, color = 'red') +
  labs(title = "Correlations between High-Income & Presidential Vote (ANES)",
       subtitle = "CONTROLS: education, race, gender (DV: Dem. prez vote)",
       x = "Estimate & 95% CI", y = "Election year") +
  scale_x_continuous(limits = c(-0.4, 0.15), breaks = seq(-0.4, 0.15, by = 0.1)) +
  scale_y_discrete(limits = positions_ANES_prez_regsmore) +
  theme(legend.position = "bottom") +
  guides(color=guide_legend(nrow=1, byrow=TRUE, title="Income group"))

ggsave(filename = "plot_coef_ANES_prez_regsmore.jpg", plot = plot_coef_ANES_prez_regsmore)

############################ right - CES regressions

cces_newregs <- cces %>% mutate(Dprez = ifelse(voted_pres_party==1, 1, 0),
                            Rprez = ifelse(voted_pres_party==2, 1, 0),
                            gov_D = ifelse(voted_gov_party == 1, 1, 0),
                            gov_R = ifelse(voted_gov_party == 2, 1, 0),
                            rep_D = ifelse(voted_rep_party == 1, 1, 0),
                            rep_R = ifelse(voted_rep_party == 2, 1, 0))

cces_newregs_add <- cces_newregs %>% 
  mutate(college = ifelse(educ>4, 1, 
                          ifelse(educ<5, 0, NA)),
         white = ifelse(race==1, 1,
                        ifelse(race>1 & race<7, 0, NA)),
         female = ifelse(gender==2, 1,
                         ifelse(gender==1, 0, NA)),
         income150k = ifelse(faminc==12,1,
                             ifelse(faminc<12, 0, NA)),
         income100k = ifelse(faminc==12|faminc==11|faminc==10,1,
                             ifelse(faminc<10, 0, NA)),
         income80k = ifelse(faminc==12|faminc==11|faminc==10|faminc==9|faminc==8,1,
                            ifelse(faminc<10, 0, NA)))

cces_regs2 <- cces_newregs_add %>% select(Dprez, income150k, income100k, income80k, college, white,female, year)

cces_regs2_2020 <- cces_regs2 %>% filter(year==2020)
cces_regs2_2016 <- cces_regs2 %>% filter(year==2016)
cces_regs2_2012 <- cces_regs2 %>% filter(year==2012)
cces_regs2_2008 <- cces_regs2 %>% filter(year==2008)

lm_cces_regs2_2020 <- lm_robust(Dprez ~ income150k + college + white + female, data = cces_regs2_2020) 
lm_cces_regs2_2016 <- lm_robust(Dprez ~ income150k + college + white + female, data = cces_regs2_2016)
lm_cces_regs2_2012 <- lm_robust(Dprez ~ income150k + college + white + female, data = cces_regs2_2012)
lm_cces_regs2_2008 <- lm_robust(Dprez ~ income150k + college + white + female, data = cces_regs2_2008)

lm_cces_regs2_2020b <- lm_robust(Dprez ~ income100k + college + white + female, data = cces_regs2_2020) 
lm_cces_regs2_2016b <- lm_robust(Dprez ~ income100k + college + white + female, data = cces_regs2_2016)
lm_cces_regs2_2012b <- lm_robust(Dprez ~ income100k + college + white + female, data = cces_regs2_2012)
lm_cces_regs2_2008b <- lm_robust(Dprez ~ income100k + college + white + female, data = cces_regs2_2008)

lm_cces_regs2_2020c <- lm_robust(Dprez ~ income80k + college + white + female, data = cces_regs2_2020) 
lm_cces_regs2_2016c <- lm_robust(Dprez ~ income80k + college + white + female, data = cces_regs2_2016)
lm_cces_regs2_2012c <- lm_robust(Dprez ~ income80k + college + white + female, data = cces_regs2_2012)
lm_cces_regs2_2008c <- lm_robust(Dprez ~ income80k + college + white + female, data = cces_regs2_2008)

# COEF PLOT:
CCES_prez_regs_df <- data.frame(
  estimate = c(lm_cces_regs2_2008$coefficients[2], lm_cces_regs2_2012$coefficients[2], 
               lm_cces_regs2_2016$coefficients[2], lm_cces_regs2_2020$coefficients[2], 
               lm_cces_regs2_2020b$coefficients[2], lm_cces_regs2_2016b$coefficients[2], 
               lm_cces_regs2_2012b$coefficients[2], lm_cces_regs2_2008b$coefficients[2],
               lm_cces_regs2_2020c$coefficients[2], lm_cces_regs2_2016c$coefficients[2], 
               lm_cces_regs2_2012c$coefficients[2], lm_cces_regs2_2008c$coefficients[2]),
  conf.low = c(lm_cces_regs2_2008$conf.low[2], lm_cces_regs2_2012$conf.low[2], 
               lm_cces_regs2_2016$conf.low[2], lm_cces_regs2_2020$conf.low[2], 
               lm_cces_regs2_2020b$conf.low[2], lm_cces_regs2_2016b$conf.low[2],
               lm_cces_regs2_2012b$conf.low[2], lm_cces_regs2_2008b$conf.low[2],
               lm_cces_regs2_2020c$conf.low[2], lm_cces_regs2_2016c$conf.low[2],
               lm_cces_regs2_2012c$conf.low[2], lm_cces_regs2_2008c$conf.low[2]),
  conf.high = c(lm_cces_regs2_2008$conf.high[2], lm_cces_regs2_2012$conf.high[2], 
                lm_cces_regs2_2016$conf.high[2], lm_cces_regs2_2020$conf.high[2], 
                lm_cces_regs2_2020b$conf.high[2], lm_cces_regs2_2016b$conf.high[2],
                lm_cces_regs2_2012b$conf.high[2], lm_cces_regs2_2008b$conf.high[2],
                lm_cces_regs2_2020c$conf.high[2], lm_cces_regs2_2016c$conf.high[2],
                lm_cces_regs2_2012c$conf.high[2], lm_cces_regs2_2008c$conf.high[2]),
  Treatment = c("2008","2012","2016","2020","2008","2012","2016","2020","2008","2012","2016","2020"),
  IncomeType = c("$150,000+ (Top 10-15%)","$150,000+ (Top 10-15%)","$150,000+ (Top 10-15%)","$150,000+ (Top 10-15%)",
                 "$100,000+ (Top 20-30%)","$100,000+ (Top 20-30%)","$100,000+ (Top 20-30%)","$100,000+ (Top 20-30%)",
                 "$80,000+ (Top 30-40%)","$80,000+ (Top 30-40%)","$80,000+ (Top 30-40%)","$80,000+ (Top 30-40%)"))

positions_cces_prez_regs <- c("2008","2012","2016","2020")

plot_coef_cces_prez_regs <- 
  ggplot(CCES_prez_regs_df, mapping = aes(x = estimate, y = Treatment, color = IncomeType),
         position = position_dodge(width = 0.6)) +
  geom_point(position = position_dodge(width = 0.6)) +
  geom_errorbar(mapping = aes(xmin = conf.low, xmax = conf.high), width = 0.3, position = position_dodge(width = 0.6)) +
  geom_vline(xintercept = 0, color = 'red') +
  labs(title = "Correlations between High-Income & Presidential Vote (CES)",
       subtitle = "CONTROLS: education, race, gender (DV: Dem. prez vote)",
       x = "Estimate & 95% CI", y = "Election year") +
  scale_x_continuous(limits = c(-0.05, 0.05), breaks = seq(-0.05, 0.05, by = 0.02)) +
  scale_y_discrete(limits = positions_cces_prez_regs) +
  theme(legend.position = "bottom") +
  guides(color=guide_legend(nrow=3, byrow=TRUE, title="Income group"))

ggsave(filename = "plot_coef_cces_prez_regs.jpg", plot = plot_coef_cces_prez_regs)

# 



#################################################### APPENDIX ####################################################

############################ Figure 1 - McCarty et al replication and extension

# pid first
ANES_PID <- ANES %>% filter(VCF0301=='1. Strong Democrat' | VCF0301=='2. Weak Democrat' | 
                              VCF0301=='3. Independent - Democrat' | VCF0301=='4. Independent - Independent' |
                              VCF0301=='5. Independent - Republican' | VCF0301=='6. Weak Republican' | 
                              VCF0301=='7. Strong Republican') %>%
  mutate(PID_D = ifelse(VCF0301=='1. Strong Democrat' | VCF0301=='2. Weak Democrat' | 
                          VCF0301=='3. Independent - Democrat', 1, 0),
         PID_R = ifelse(VCF0301=='5. Independent - Republican' | VCF0301=='6. Weak Republican' | 
                          VCF0301=='7. Strong Republican', 1, 0))

ANES_PID_incomes_McC <- ANES_PID %>% 
  mutate(top_third = ifelse(VCF0114 == "4. 68 to 95 percentile" | VCF0114 == "5. 96 to 100 percentile", 1, 0),
         bottom_sixth = ifelse(VCF0114 == "1. 0 to 16 percentile", 1, 0))
ANES_PID_incomes_bottomsixth_McC <- ANES_PID_incomes_McC %>% filter(bottom_sixth==1) %>%
  as_survey(weights = c(VCF0009z)) %>% group_by(VCF0004) %>%
  summarise(bottom_sixth_choosingR_mean = survey_mean(PID_R==1, na.rm = T))
ANES_PID_incomes_topthird_McC <- ANES_PID_incomes_McC %>% filter(top_third==1) %>%
  as_survey(weights = c(VCF0009z)) %>% group_by(VCF0004) %>%
  summarise(top_third_choosingR_mean = survey_mean(PID_R==1, na.rm = T))
ANES_PID_incomes_topthird_McC <- ANES_PID_incomes_topthird_McC %>% 
  select(top_third_choosingR_mean, top_third_choosingR_mean_se)

ANES_PID_incomes_combined_McC <- cbind(ANES_PID_incomes_bottomsixth_McC, ANES_PID_incomes_topthird_McC)

# add 2020:
ANES_20_PID <- ANES_20 %>% filter(V201231x>0) %>% mutate(
  PID_D = ifelse(V201231x == 1 | V201231x == 2 | V201231x == 3, 1, 0),
  PID_R = ifelse(V201231x == 5 | V201231x == 6 | V201231x == 7, 1, 0)
)

ANES_20_PID_incomes_McC <- ANES_20_PID %>%
  mutate(top_third = ifelse(V201617x == 22 | V201617x == 21 | V201617x == 20 | V201617x == 19 | 
                              V201617x == 18 | V201617x == 17, 1, 0),
         bottom_sixth = ifelse(V201617x == 1 | V201617x == 2 | V201617x == 3 | V201617x == 4, 1, 0))
ANES_20_PID_incomes_McC_mo0 <- ANES_20_PID_incomes_McC %>% filter(V200010b>0)
ANES_20_PID_incomes_bottomsixth_McC <- ANES_20_PID_incomes_McC_mo0 %>% filter(bottom_sixth==1) %>%
  as_survey(weights = c(V200010b)) %>%
  summarise(bottom_sixth_choosingR_mean = survey_mean(PID_R==1, na.rm = T))
ANES_20_PID_incomes_topthird_McC <- ANES_20_PID_incomes_McC_mo0 %>% filter(top_third==1) %>%
  as_survey(weights = c(V200010b)) %>%
  summarise(top_third_choosingR_mean = survey_mean(PID_R==1, na.rm = T))

ANES_20_PID_bottom_top_combined_McC <- cbind(ANES_20_PID_incomes_bottomsixth_McC, ANES_20_PID_incomes_topthird_McC)
ANES_20_PID_bottom_top_combined_McC$VCF0004 <- 2020
ANES_20_PID_bottom_top_combined_McC <- ANES_20_PID_bottom_top_combined_McC %>% 
  select(VCF0004, bottom_sixth_choosingR_mean, bottom_sixth_choosingR_mean_se, 
         top_third_choosingR_mean, top_third_choosingR_mean_se)

PID_bottomtop_all_McC <- rbind(ANES_PID_incomes_combined_McC, ANES_20_PID_bottom_top_combined_McC)

PID_bottomtop_all_diffshare_McC <- PID_bottomtop_all_McC %>% 
  mutate(proportion_top_bottom_R_PID = top_third_choosingR_mean / bottom_sixth_choosingR_mean)

# prez vote
ANES_prez <- ANES %>% filter(VCF0705 == "1. Democrat" | VCF0705 == "2. Republican" | 
                               VCF0705 == "3. Other (incl. 3d/minor party candidates and write-ins)") %>%
  mutate(prez_D = ifelse(VCF0705 == '1. Democrat', 1, 0),
         prez_R = ifelse(VCF0705=='2. Republican', 1, 0))

ANES_20_prez <- ANES_20 %>% filter(V202110x > 0) %>%
  mutate(prez_D = ifelse(V202110x == 1, 1, 0),
    prez_R = ifelse(V202110x == 2, 1, 0))

ANES_prez_incomes_McC <- ANES_prez %>% 
  mutate(top_third = ifelse(VCF0114 == "4. 68 to 95 percentile" | VCF0114 == "5. 96 to 100 percentile", 1, 0),
         bottom_sixth = ifelse(VCF0114 == "1. 0 to 16 percentile", 1, 0))
ANES_prez_incomes_bottomsixth_McC <- ANES_prez_incomes_McC %>% filter(bottom_sixth==1) %>%
  as_survey(weights = c(VCF0009z)) %>% group_by(VCF0004) %>%
  summarise(bottom_sixth_choosingR_mean = survey_mean(prez_R==1, na.rm = T))
ANES_prez_incomes_topthird_McC <- ANES_prez_incomes_McC %>% filter(top_third==1) %>%
  as_survey(weights = c(VCF0009z)) %>% group_by(VCF0004) %>%
  summarise(top_third_choosingR_mean = survey_mean(prez_R==1, na.rm = T))
ANES_prez_incomes_topthird_McC <- ANES_prez_incomes_topthird_McC %>% 
  select(top_third_choosingR_mean, top_third_choosingR_mean_se)

ANES_prez_incomes_combined_McC <- cbind(ANES_prez_incomes_bottomsixth_McC, ANES_prez_incomes_topthird_McC)

# add 2020:
ANES_20_prez_incomes_McC <- ANES_20_prez %>%
  mutate(top_third = ifelse(V201617x == 22 | V201617x == 21 | V201617x == 20 | V201617x == 19 | 
                              V201617x == 18 | V201617x == 17, 1, 0),
         bottom_sixth = ifelse(V201617x == 1 | V201617x == 2 | V201617x == 3 | V201617x == 4, 1, 0))
ANES_20_prez_incomes_McC_no0 <- ANES_20_prez_incomes_McC %>% filter(V200010b>0)
ANES_20_prez_incomes_bottomsixth_McC <- ANES_20_prez_incomes_McC_no0 %>% filter(bottom_sixth==1) %>%
  as_survey(weights = c(V200010b)) %>%
  summarise(bottom_sixth_choosingR_mean = survey_mean(prez_R==1, na.rm = T))
ANES_20_prez_incomes_topthird_McC <- ANES_20_prez_incomes_McC_no0 %>% filter(top_third==1) %>%
  as_survey(weights = c(V200010b)) %>%
  summarise(top_third_choosingR_mean = survey_mean(prez_R==1, na.rm = T))

ANES_20_prez_bottom_top_combined_McC <- cbind(ANES_20_prez_incomes_bottomsixth_McC, ANES_20_prez_incomes_topthird_McC)
ANES_20_prez_bottom_top_combined_McC$VCF0004 <- 2020
ANES_20_prez_bottom_top_combined_McC <- ANES_20_prez_bottom_top_combined_McC %>% 
  select(VCF0004, bottom_sixth_choosingR_mean, bottom_sixth_choosingR_mean_se, 
         top_third_choosingR_mean, top_third_choosingR_mean_se)

prez_bottomtop_all_McC <- rbind(ANES_prez_incomes_combined_McC, ANES_20_prez_bottom_top_combined_McC)

prez_bottomtop_all_diffshare_McC <- prez_bottomtop_all_McC %>% 
  mutate(proportion_top_bottom_R_prez = top_third_choosingR_mean / bottom_sixth_choosingR_mean)

# plot
McCarty_topthirdbottomsixth_ratio_PIDprez_plot <- 
  ggplot() + 
  geom_line(PID_bottomtop_all_diffshare_McC, mapping = aes(x = VCF0004, y = proportion_top_bottom_R_PID, 
                                                           color = "darksalmon")) +
  geom_line(prez_bottomtop_all_diffshare_McC, mapping = aes(x = VCF0004, y = proportion_top_bottom_R_prez, 
                                                            color = "red1")) +
  geom_point(PID_bottomtop_all_diffshare_McC, mapping = aes(x = VCF0004, y = proportion_top_bottom_R_PID, 
                                                            color = "darksalmon")) +
  geom_point(prez_bottomtop_all_diffshare_McC, mapping = aes(x = VCF0004, y = proportion_top_bottom_R_prez, 
                                                             color = "red1")) +
  ylab("High-income Republican tilt (% Top 1/3 / % Bottom 1/6)") + xlab("Year") + 
  labs(title = "Republican PID & Prez Vote 'Stratification' by Income",
       subtitle = "Dotted line: End of McCarty et al (2006) analysis (ANES)") +
  theme(legend.position = "bottom") +
  theme_bw() +
  geom_hline(yintercept = 1, color = "purple") +
  geom_vline(xintercept = 2002, color = "black", linetype = "dashed") +
  scale_y_continuous(breaks = seq(0.9, 3.0, by = 0.3), 
                     limits = c(0.9, 2.7)) +
  scale_x_continuous(breaks = seq(1952, 2020, by = 4), limits = c(1952, 2020)) +
  scale_color_identity(name = "Behavior",
                       breaks = c("darksalmon", "red1"),
                       labels = c("Party ID", "Prez vote"),
                       guide = "legend") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

ggsave(filename = "McCartyAPPX_topthirdbottomsixth_ratio_PIDprez_plot.jpg", 
       plot = McCarty_topthirdbottomsixth_ratio_PIDprez_plot)

# 

############################ Figure 2 - CES governor voting

prez_Ushape_byyearCCESguv <- 
  ggplot() + 
  geom_point(CCES_80121620_weightprez_CALC_20, mapping = aes(x = income_cat, y = DoverRguv), color = "black") +
  geom_line(CCES_80121620_weightprez_CALC_20, mapping = aes(x = income_cat, y = DoverRguv, group = year), 
            color = "black") +
  geom_point(CCES_80121620_weightprez_CALC_16, mapping = aes(x = income_cat, y = DoverRguv), color = "purple3") +
  geom_line(CCES_80121620_weightprez_CALC_16, mapping = aes(x = income_cat, y = DoverRguv, group = year), 
            color = "purple3") +
  geom_point(CCES_80121620_weightprez_CALC_12, mapping = aes(x = income_cat, y = DoverRguv), color = "hotpink4") +
  geom_line(CCES_80121620_weightprez_CALC_12, mapping = aes(x = income_cat, y = DoverRguv, group = year), 
            color = "hotpink4") +
  geom_point(CCES_80121620_weightprez_CALC_08, mapping = aes(x = income_cat, y = DoverRguv), 
             color = "magenta3") +
  geom_line(CCES_80121620_weightprez_CALC_08, mapping = aes(x = income_cat, y = DoverRguv, group = year), 
            color = "magenta3") +
  annotate("text", x = "80-100%", y = -.03, label = "2020", size = 4.5, color = "black", fontface = "bold") +
  annotate("text", x = "80-100%", y = .115, label = "2016", size = 4.5, color = "purple3", fontface = "bold") +
  annotate("text", x = "80-100%", y = .015, label = "2012", size = 4.5, color = "hotpink4", fontface = "bold") +
  annotate("text", x = "80-100%", y = .065, label = "2008", size = 4.5, color = "magenta3", fontface = "bold") +
  ylab("Dem. prez vote % - Repub. prez vote %") + xlab("Income group") + 
  labs(title = "Democratic Governor Vote Margin by Income Groups",
       subtitle = "CCES (2008-2020)") +
  theme(legend.position = "bottom") +
  theme_bw() +
  geom_hline(yintercept = 0, color = "purple") +
  scale_y_continuous(breaks = seq(-1.0, 1.0, by = 0.05), labels = scales::percent_format(accuracy = 1), 
                     limits = c(-0.15, 0.2)) 

ggsave(filename = "prez_Ushape_byyearCCESguv.jpg", plot = prez_Ushape_byyearCCESguv)

# 

############################ Figure 3 - CES House voting

prez_Ushape_byyearCCEShouse <- 
  ggplot() + 
  geom_point(CCES_80121620_weightprez_CALC_20, mapping = aes(x = income_cat, y = DoverRhouse), color = "black") +
  geom_line(CCES_80121620_weightprez_CALC_20, mapping = aes(x = income_cat, y = DoverRhouse, group = year), 
            color = "black") +
  geom_point(CCES_80121620_weightprez_CALC_16, mapping = aes(x = income_cat, y = DoverRhouse), color = "purple3") +
  geom_line(CCES_80121620_weightprez_CALC_16, mapping = aes(x = income_cat, y = DoverRhouse, group = year), 
            color = "purple3") +
  geom_point(CCES_80121620_weightprez_CALC_12, mapping = aes(x = income_cat, y = DoverRhouse), color = "hotpink4") +
  geom_line(CCES_80121620_weightprez_CALC_12, mapping = aes(x = income_cat, y = DoverRhouse, group = year), 
            color = "hotpink4") +
  geom_point(CCES_80121620_weightprez_CALC_08, mapping = aes(x = income_cat, y = DoverRhouse), 
             color = "magenta3") +
  geom_line(CCES_80121620_weightprez_CALC_08, mapping = aes(x = income_cat, y = DoverRhouse, group = year), 
            color = "magenta3") +
  annotate("text", x = "80-100%", y = .14, label = "2020", size = 4.5, color = "black", fontface = "bold") +
  annotate("text", x = "80-100%", y = .08, label = "2016", size = 4.5, color = "purple3", fontface = "bold") +
  annotate("text", x = "80-100%", y = .03, label = "2012", size = 4.5, color = "hotpink4", fontface = "bold") +
  annotate("text", x = "80-100%", y = -.01, label = "2008", size = 4.5, color = "magenta3", fontface = "bold") +
  ylab("Dem. prez vote % - Repub. prez vote %") + xlab("Income group") + 
  labs(title = "Democratic U.S. House Vote Margin by Income Groups",
       subtitle = "CCES (2008-2020)") +
  theme(legend.position = "bottom") +
  theme_bw() +
  geom_hline(yintercept = 0, color = "purple") +
  scale_y_continuous(breaks = seq(-1.0, 1.0, by = 0.05), labels = scales::percent_format(accuracy = 1), 
                     limits = c(-0.05, 0.25)) 

ggsave(filename = "prez_Ushape_byyearCCEShouse.jpg", plot = prez_Ushape_byyearCCEShouse)

# 

############################ Figure 4 - GSS voting by wealth

gss_wealthcats <- gss_PIDprez %>% mutate(wealth_new = ifelse(wealth < 4, 1, # negative-40k
                                                             ifelse(wealth > 3 & wealth < 6, 2, # 40-100k
                                                                    ifelse(wealth > 5 & wealth < 8, 3, # 100-250k
                                                                           ifelse(wealth==8, 4, # 250k-500k
                                                                                  ifelse(wealth == 9, 5, #500k-1m
                                                                                         ifelse(wealth>9, 6, NA))))))) # 1m+ 

gss_wealth6_prez <- gss_wealthcats %>% filter(wealth_new == 6) %>% as_survey(weights = c(wtssall)) %>%
  group_by(year) %>%
  summarize(mean_prez_D = survey_mean(prez_D, na.rm = T), mean_prez_R = survey_mean(prez_R, na.rm = T))
gss_wealth6_prez <- gss_wealth6_prez %>% mutate(prezDshare = mean_prez_D / (mean_prez_D + mean_prez_R))

gss_wealth5_prez <- gss_wealthcats %>% filter(wealth_new == 5) %>% as_survey(weights = c(wtssall)) %>%
  group_by(year) %>%
  summarize(mean_prez_D = survey_mean(prez_D, na.rm = T), mean_prez_R = survey_mean(prez_R, na.rm = T))
gss_wealth5_prez <- gss_wealth5_prez %>% mutate(prezDshare = mean_prez_D / (mean_prez_D + mean_prez_R))

gss_wealth4_prez <- gss_wealthcats %>% filter(wealth_new == 4) %>% as_survey(weights = c(wtssall)) %>%
  group_by(year) %>%
  summarize(mean_prez_D = survey_mean(prez_D, na.rm = T), mean_prez_R = survey_mean(prez_R, na.rm = T))
gss_wealth4_prez <- gss_wealth4_prez %>% mutate(prezDshare = mean_prez_D / (mean_prez_D + mean_prez_R))

gss_wealth3_prez <- gss_wealthcats %>% filter(wealth_new == 3) %>% as_survey(weights = c(wtssall)) %>%
  group_by(year) %>%
  summarize(mean_prez_D = survey_mean(prez_D, na.rm = T), mean_prez_R = survey_mean(prez_R, na.rm = T))
gss_wealth3_prez <- gss_wealth3_prez %>% mutate(prezDshare = mean_prez_D / (mean_prez_D + mean_prez_R))

gss_wealth2_prez <- gss_wealthcats %>% filter(wealth_new == 2) %>% as_survey(weights = c(wtssall)) %>%
  group_by(year) %>%
  summarize(mean_prez_D = survey_mean(prez_D, na.rm = T), mean_prez_R = survey_mean(prez_R, na.rm = T))
gss_wealth2_prez <- gss_wealth2_prez %>% mutate(prezDshare = mean_prez_D / (mean_prez_D + mean_prez_R))

gss_wealth1_prez <- gss_wealthcats %>% filter(wealth_new == 1) %>% as_survey(weights = c(wtssall)) %>%
  group_by(year) %>%
  summarize(mean_prez_D = survey_mean(prez_D, na.rm = T), mean_prez_R = survey_mean(prez_R, na.rm = T))
gss_wealth1_prez <- gss_wealth1_prez %>% mutate(prezDshare = mean_prez_D / (mean_prez_D + mean_prez_R))

gss_wealth6_prez2 <- gss_wealth6_prez %>% mutate(group="$1m+")
gss_wealth5_prez2 <- gss_wealth5_prez %>% mutate(group="$500k-1m")
gss_wealth4_prez2 <- gss_wealth4_prez %>% mutate(group="$250-500k")
gss_wealth3_prez2 <- gss_wealth3_prez %>% mutate(group="$100-250k")
gss_wealth2_prez2 <- gss_wealth2_prez %>% mutate(group="$40-100k")
gss_wealth1_prez2 <- gss_wealth1_prez %>% mutate(group="$>$0-40k")
gss_wealth_prezALL <- rbind(gss_wealth6_prez2, gss_wealth5_prez2, gss_wealth4_prez2,
                            gss_wealth3_prez2, gss_wealth2_prez2, gss_wealth1_prez2)

plot_wealth_prezALL <- 
  ggplot(data = gss_wealth_prezALL, mapping = aes(year, prezDshare, color = group)) +
  geom_line(data = gss_wealth_prezALL, mapping = aes(year, prezDshare, color = group), alpha = 0.4) +
  geom_point(size = 1.0, alpha = 0.4) +
  geom_smooth(method = "lm", se = FALSE) +
  ylab("Democratic Share of 2-Party Vote") + xlab("Year") +
  labs(title = "Prez Voting By Wealth",
       subtitle = "2006-18 (GSS); $1m+ = ~top 10%") +
  theme(legend.position = "bottom") +
  theme_bw() +
  geom_hline(yintercept = 0.5, color = "purple") +
  scale_color_manual(name = "Wealth group",
                     values = c("darkgreen", "green2", "turquoise1", "orange", "purple", "red"), 
                     labels = c("$<0-40k\n(N=303,126,213)","$100-250k\n(N=495,278,486)",
                                "$1M+\n(N=52,34,96)","$250-500k\n(N=250,88,181)", 
                                "$40-100k\n(N=228,116,255)", "$500k-1M\n(N=136,49,109)")) +
  scale_y_continuous(breaks = seq(0, 0.9, by = 0.05), labels = scales::percent_format(accuracy = 1), 
                     limits = c(0.25, 0.8)) +
  scale_x_continuous(breaks = seq(2006, 2018, by = 4), limits = c(2006, 2018)) +
  annotate("text", x = 2007, y = .34, label = "2004, \nGore \n(v Bush)", size = 3) +
  annotate("text", x = 2014, y = .37, label = "2012, \nObama \n(v Romney)", size = 3) +
  annotate("text", x = 2017.5, y = .62, label = "2016, \nClinton \n(v Trump)", size = 3) 

ggsave(filename = "plot_wealth_prezALL.jpg", plot = plot_wealth_prezALL)

# 

############################ Figure 5 - GSS bottom 75% by occupation

gss_occup_CEOmanager_prezINClow <- gss_occupcats %>% filter(occ_new == "CEO/manager" & 
                                                              ((income16<23)|(income06<22)|
                                                                 (income98<21)|(income91<19)|
                                                                 (income86<18)|(income82==17|income82==17)|
                                                                 (income77<14)|(income72<10))) %>% 
  as_survey(weights = c(wtssall)) %>% group_by(year) %>%
  summarize(mean_prez_D = survey_mean(prez_D, na.rm = T), mean_prez_R = survey_mean(prez_R, na.rm = T))
gss_occup_CEOmanager_prezINClow <- gss_occup_CEOmanager_prezINClow %>% 
  mutate(prezDshare = mean_prez_D / (mean_prez_D + mean_prez_R))

gss_occup_bizfinance_prezINClow <- gss_occupcats %>% filter(occ_new == "business/finance" & 
                                                              ((income16<23)|(income06<22)|
                                                                 (income98<21)|(income91<19)|
                                                                 (income86<18)|(income82==17|income82==17)|
                                                                 (income77<14)|(income72<10))) %>% 
  as_survey(weights = c(wtssall)) %>%group_by(year) %>%
  summarize(mean_prez_D = survey_mean(prez_D, na.rm = T), mean_prez_R = survey_mean(prez_R, na.rm = T))
gss_occup_bizfinance_prezINClow <- gss_occup_bizfinance_prezINClow %>% 
  mutate(prezDshare = mean_prez_D / (mean_prez_D + mean_prez_R))

gss_occup_profsci_prezINClow <- gss_occupcats%>%filter(occ_new == "professional/scientific" & 
                                                         ((income16<23)|(income06<22)|
                                                            (income98<21)|(income91<19)|
                                                            (income86<18)|(income82==17|income82==17)|
                                                            (income77<14)|(income72<10)))%>%
  as_survey(weights = c(wtssall)) %>%group_by(year) %>%
  summarize(mean_prez_D = survey_mean(prez_D, na.rm = T), mean_prez_R = survey_mean(prez_R, na.rm = T))
gss_occup_profsci_prezINClow <- gss_occup_profsci_prezINClow %>% mutate(prezDshare = mean_prez_D / (mean_prez_D + mean_prez_R))

gss_occup_humanarts_prezINClow <- gss_occupcats %>% filter(occ_new == "human services/arts" & 
                                                             ((income16<23)|(income06<22)|
                                                                (income98<21)|(income91<19)|
                                                                (income86<18)|(income82==17|income82==17)|
                                                                (income77<14)|(income72<10))) %>% 
  as_survey(weights = c(wtssall)) %>% group_by(year) %>%
  summarize(mean_prez_D = survey_mean(prez_D, na.rm = T), mean_prez_R = survey_mean(prez_R, na.rm = T))
gss_occup_humanarts_prezINClow <- gss_occup_humanarts_prezINClow %>% mutate(prezDshare = mean_prez_D / (mean_prez_D + mean_prez_R))

gss_occup_bluecollar_prezINClow <- gss_occupcats %>% filter(occ_new == "blue collar" & 
                                                              ((income16<23)|(income06<22)|
                                                                 (income98<21)|(income91<19)|
                                                                 (income86<18)|(income82==17|income82==17)|
                                                                 (income77<14)|(income72<10))) %>% as_survey(weights = c(wtssall)) %>%
  group_by(year) %>%
  summarize(mean_prez_D = survey_mean(prez_D, na.rm = T), mean_prez_R = survey_mean(prez_R, na.rm = T))
gss_occup_bluecollar_prezINClow <- gss_occup_bluecollar_prezINClow %>%
  mutate(prezDshare = mean_prez_D / (mean_prez_D + mean_prez_R))

gss_occup_adminservice_prezINClow <- gss_occupcats %>% filter(occ_new == "admin/other service" & 
                                                                ((income16<23)|(income06<22)|
                                                                   (income98<21)|(income91<19)|
                                                                   (income86<18)|(income82==17|income82==17)|
                                                                   (income77<14)|(income72<10)))%>%
  as_survey(weights = c(wtssall)) %>%group_by(year) %>%
  summarize(mean_prez_D = survey_mean(prez_D, na.rm = T), mean_prez_R = survey_mean(prez_R, na.rm = T))
gss_occup_adminservice_prezINClow <- gss_occup_adminservice_prezINClow %>% 
  mutate(prezDshare = mean_prez_D / (mean_prez_D + mean_prez_R))

gss_occup_CEOmanager_prezINC2low <- gss_occup_CEOmanager_prezINClow %>% mutate(group="CEOmanager")
gss_occup_bizfinance_prezINC2low <- gss_occup_bizfinance_prezINClow %>% mutate(group="bizfinance")
gss_occup_profsci_prezINC2low <- gss_occup_profsci_prezINClow %>% mutate(group="profsci")
gss_occup_humanarts_prezINC2low <- gss_occup_humanarts_prezINClow %>% mutate(group="humanarts")
gss_occup_bluecollar_prezINC2low <- gss_occup_bluecollar_prezINClow %>% mutate(group="bluecollar")
gss_occup_adminservice_prezINC2low <- gss_occup_adminservice_prezINClow %>% mutate(group="adminservice")
gss_occup_prez_ALLlow <- rbind(gss_occup_CEOmanager_prezINC2low, gss_occup_bizfinance_prezINC2low,
                               gss_occup_profsci_prezINC2low, gss_occup_humanarts_prezINC2low,
                               gss_occup_bluecollar_prezINC2low, gss_occup_adminservice_prezINC2low)

plot_GSSbyoccupation_prezINC_2getherLOW <- 
  ggplot(data = gss_occup_prez_ALLlow, mapping = aes(year, prezDshare, color = group)) +
  geom_line(data = gss_occup_prez_ALLlow, mapping = aes(year, prezDshare, color = group), alpha = 0.4) +
  geom_point(size = 1.0, alpha = 0.5) +
  geom_smooth(method = "lm", se = FALSE) +
  ylab("Democratic Share of 2-Party Vote") + xlab("Year") +
  labs(title = "Democratic Prez Voting By Occupation Type",
       subtitle = "(GSS, 1972-2016, ONLY Bottom 75% by Income)") +
  theme(legend.position = "bottom") +
  theme_bw() +
  geom_hline(yintercept = 0.5, color = "purple") +
  scale_y_continuous(breaks = seq(0, 0.9, by = 0.05), labels = scales::percent_format(accuracy = 1), 
                     limits = c(0.15, 0.8)) +
  scale_x_continuous(breaks = seq(1972, 2016, by = 4), limits = c(1972, 2016)) +
  scale_color_manual(name = "Prez Vote\nby Occupation",
                     values = c("green", "gold", "purple", "black", "deeppink1", "turquoise2"), 
                     labels = c("Admin/other service", "Business/finance","Blue collar",
                                "CEO/manager", "Human services/arts",
                                "Professional/scientific")) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

ggsave(filename = "plot_GSSbyoccupation_prezINC_2getherLOW.jpg", plot = plot_GSSbyoccupation_prezINC_2getherLOW)

# 

############################ Figure 6 - ANES professional and managerial voting

ANES_profmanag_prez <- ANES_prez %>% filter(VCF0115 == '1. Professional and managerial') %>% 
  as_survey(weights = c(VCF0009z)) %>% 
  group_by(VCF0004) %>% 
  summarize(mean_prez_D = survey_mean(prez_D, na.rm = T), mean_prez_R = survey_mean(prez_R, na.rm = T))
ANES_profmanag_prez <- ANES_profmanag_prez %>% filter(mean_prez_R > 0)
# plot!
plot_ANES_both_profmanag_prez <- ggplot() +
  geom_line(data = ANES_profmanag_prez, mapping = aes(VCF0004, mean_prez_D, color = 'blue')) +
  geom_line(data = ANES_profmanag_prez, mapping = aes(VCF0004, mean_prez_R, color = 'red')) +
  geom_point(data = ANES_profmanag_prez, mapping = aes(VCF0004, mean_prez_D, color = 'blue')) +
  geom_point(data = ANES_profmanag_prez, mapping = aes(VCF0004, mean_prez_R, color = 'red')) +
  ylab("") + xlab("Year") +
  labs(title = "Professionals & Managers: Prez Voting, 1952-2004 (ANES)") +
  theme(legend.position = "bottom") +
  theme_bw() +
  geom_hline(yintercept = 0.5, color = "purple") +
  scale_y_continuous(breaks = seq(0, 0.8, by = 0.05), labels = scales::percent_format(accuracy=1), 
                     limits = c(0.3, 0.7)) +
  scale_x_continuous(breaks = seq(1956, 2004, by = 8)) +
  scale_color_identity(name = "Vote choice",
                       breaks = c("blue", "red"),
                       labels = c("Democrat", "Republican"),
                       guide = "legend")

ggsave("plot_ANES_both_profmanag_prez.jpg", plot_ANES_both_profmanag_prez)

# 

############################ Figure 7 - red v blue states 

cces_bystate <- cces %>% mutate(
  PID_D = ifelse(pid7 == 1 | pid7 == 2 | pid7 == 3, 1, 0),
  PID_R = ifelse(pid7 == 5 | pid7 == 6 | pid7 == 7, 1, 0),
  prez_D = ifelse(voted_pres_party == 1, 1, 0),
  prez_R = ifelse(voted_pres_party == 2, 1, 0)
)

Dsbystate_150kplus <- cces_bystate %>%
  filter(faminc == 12) %>% as_survey(weights = c(weight)) %>% group_by(state, year) %>% 
  summarize(mean_prez_D = survey_mean(prez_D, na.rm = T), mean_prez_R = survey_mean(prez_R, na.rm = T))

Dsbystate_150kplus <- Dsbystate_150kplus %>% arrange(year)

x <- stack(attr(Dsbystate_150kplus$state, 'labels'))
x <- as.data.frame(x)
x$state <- x$values
Dsbystate_150kplus$state <- as.numeric(Dsbystate_150kplus$state)

Dsbystate_150kplus_new <- merge(Dsbystate_150kplus, x, by.x = "state")

Dsbystate_150kplus_new <- Dsbystate_150kplus_new %>% 
  filter(year == 2008 | year == 2010 | year == 2012 | year == 2014 | year == 2016 | year == 2018 | year == 2020)
Dsbystate_150kplus_new_smaller <- Dsbystate_150kplus_new %>% 
  filter(year == 2008 | year == 2012 | year == 2016 |  year == 2020)

# create red & blue
Dsbystate_150kplus_new_smaller_RedBlue <- Dsbystate_150kplus_new_smaller %>%
  group_by(state) %>% summarize(mean(mean_prez_D)) %>% mutate(D = ifelse(`mean(mean_prez_D)`>0.5, 1, 0))

Dsbystate_150kplus_new_smaller_merged <- Dsbystate_150kplus_new_smaller %>% 
  left_join(Dsbystate_150kplus_new_smaller_RedBlue, by = c("state"))

# plot!
Dsbystate_150kplus_new_smaller_merged_Rs <- Dsbystate_150kplus_new_smaller_merged %>% filter(D==0)
Dsbystate_150kplus_new_smaller_merged_Ds <- Dsbystate_150kplus_new_smaller_merged %>% filter(D==1)

fitlm_Rs = lm(mean_prez_D ~ year, data = Dsbystate_150kplus_new_smaller_merged_Rs)
Dsbystate_150kplus_new_smaller_merged_Rs$predlm = predict(fitlm_Rs)

fitlm_Ds = lm(mean_prez_D ~ year, data = Dsbystate_150kplus_new_smaller_merged_Ds)
Dsbystate_150kplus_new_smaller_merged_Ds$predlm = predict(fitlm_Ds)

plot_150kplusD_bystate <- 
  ggplot(data = Dsbystate_150kplus_new_smaller_merged, mapping = aes(x = year, y = mean_prez_D,color = as.factor(D)))+
  geom_point() +
  geom_line(data = Dsbystate_150kplus_new_smaller_merged_Rs, aes(y = predlm), size = 0.75, color = 'red') +
  geom_line(data = Dsbystate_150kplus_new_smaller_merged_Ds, aes(y = predlm), size = 0.75, color = 'blue') +
  ylab("Share of $150k+ voting D prez") + xlab("Year") +
  labs(title = "$150,000+ Family Income: Prez Voting, Red v. Blue States",
       subtitle = "Blue (Red) State: avg. 2008-20 D presidential vote share > (<) 50% (CCES)") +
  theme(legend.position = "bottom") +
  theme_bw() +
  geom_hline(yintercept = 0.5, color = "purple") +
  scale_y_continuous(breaks = seq(0, 1.0, by = 0.1), labels = scales::percent_format(accuracy=1), 
                     limits = c(0, 1.0)) +
  scale_x_continuous(breaks = seq(2008, 2020, by = 4), limits = c(2008, 2020)) +
  scale_color_manual(name = "State type", values = c("red", "blue"), labels = c("Republican", "Democratic"))

ggsave("plot_150kplusD_bystate.jpg", plot_150kplusD_bystate)

# 

############################ Figure 8 - $150k+ voting by industry type

cces_12_industry <- cces_12 %>% select(V103, pid7, faminc, CC410a, industryclass)
cces_12_industry <- cces_12_industry %>% mutate(prez_vote = ifelse(CC410a==2, 1,
                                                                   ifelse(CC410a==1, 2, 
                                                                          ifelse(CC410a==3, 3, NA))),
                                                year = 2012,
                                                industry2 = ifelse(industryclass>1 & industryclass<22, industryclass+2,
                                                                   ifelse(industryclass==1, 1, NA)))
cces_12_industry <- cces_12_industry %>% select(V103, pid7, faminc, prez_vote, industry2, year)
colnames(cces_12_industry) <- c("weight", "pid", "income", "prez_vote", "industry", "year")
cces_12_industry <- as.data.frame(cces_12_industry)

# 2016
cces_16_industry <- cces_16 %>% select(commonweight, pid7, faminc, CC16_410a, industryclass) %>% mutate(year = 2016)
colnames(cces_16_industry) <- c("weight", "pid", "income", "prez_vote", "industry", "year")
cces_16_industry <- as.data.frame(cces_16_industry)

# 2020
cces_20_industry <- cces_20 %>% select(commonweight, pid7, faminc_new, CC20_410, industryclass)
cces_20_industry <- cces_20_industry %>% mutate(prez_vote = ifelse(CC20_410==2, 1,
                                                                   ifelse(CC20_410==1, 2, NA)),
                                                year = 2020)
cces_20_industry <- cces_20_industry %>% select(commonweight, pid7, faminc_new, prez_vote, industryclass, year)
colnames(cces_20_industry) <- c("weight", "pid", "income", "prez_vote", "industry", "year")

# STACK!
cces_1220_industry <- bind_rows(cces_12_industry, cces_16_industry, cces_20_industry)

nrow(cces_1220_industry) # 218k
cces_1220_industry %>% filter(industry > 0) %>% nrow() # 190k
tail(cces_1220_industry)
head(cces_1220_industry)
cces_1220_industry <- as.data.frame(cces_1220_industry)

### group industries:
# X: ag / forestry / mining hunting / utilities / construction / manufacturing / waste man + remed / transpo / 2 trades
# Y: finance + insurance / real estate + rental + licensing / CEOs
# Z: education / healthcare social / professional / arts entertain / public admin
# B: professional
# A: administrative support / accomodation / information / other services

cces_1220_industry_types <- cces_1220_industry %>%
  mutate(industry_type = ifelse(industry<11 | industry==17, "Manual labor",
                                ifelse(industry==12|industry==13|industry==15, "FIRE/CEO",
                                       ifelse(industry==18|industry==19|industry==20|industry==23,
                                              "Educ./health/arts/govt",
                                              ifelse(industry==16|industry==21|industry==22|industry==11, "Service",
                                                     ifelse(industry==14, "Professional/scientific", NA))))))
na.omit(cces_1220_industry_types$industry_type)

### prez / PID:

cces_1220_industry_types_prez12 <- cces_1220_industry_types %>% filter(prez_vote>0 & prez_vote<3)

cces_1220_industry_types_prezPID <- cces_1220_industry_types_prez12 %>% filter(pid<8) %>%
  mutate(prez_D = ifelse(prez_vote==2, 1, 0),
         prez_R = ifelse(prez_vote==1, 1, 0),
         PID_D = ifelse(pid==1 | pid==2 | pid==3, 1, 0),
         PID_R = ifelse(pid==5 | pid==6 | pid==7, 1, 0))

### break out by income:

cces_1220_industry_types_prezPID_150k <- cces_1220_industry_types_prezPID %>% filter(income > 11 & income < 97)
nrow(cces_1220_industry_types_prezPID_150k) # 11k

# 150k+, all industry type categories:

cces_1220_industry_types_prezPID_150k_group <- cces_1220_industry_types_prezPID_150k %>% 
  as_survey(weights = c(weight))  %>% group_by(year, industry_type) %>% 
  summarize(mean_PID_D = survey_mean(PID_D, na.rm = T), mean_PID_R = survey_mean(PID_R, na.rm = T),
            mean_prez_D = survey_mean(prez_D, na.rm = T), mean_prez_R = survey_mean(prez_R, na.rm = T))
cces_1220_industry_types_prezPID_150k_group <- na.omit(cces_1220_industry_types_prezPID_150k_group)

# plot
plot_cces_1220_industry_types_prezPID_150k <- 
  ggplot() +
  geom_point(cces_1220_industry_types_prezPID_150k_group, mapping = aes(x = year, y = mean_prez_D, color = industry_type)) +
  geom_line(cces_1220_industry_types_prezPID_150k_group, mapping = aes(x = year, y = mean_prez_D, color = industry_type)) +
  labs(title = "$150,000+ Family Income: Prez Voting by Industry",
       subtitle = "CCES (2012-20)",
       y = "Democratic presidential vote share", x = "year") +
  theme_bw() +
  theme(legend.position = "bottom") +
  geom_hline(yintercept = 0.5, color = "purple") +
  scale_y_continuous(breaks = seq(0, 0.9, by = 0.05), limits = c(0.35, 0.75), labels = scales::percent_format(accuracy=1)) +
  guides(color=guide_legend(nrow=3, byrow=TRUE, title="Industry type")) 

ggsave("plot_cces_1220_industry_types_prezPID_150k.jpg", plot_cces_1220_industry_types_prezPID_150k)

# 



